From 1d257dc5f553d67fd1a1dff0e3a13bf35da69de8 Mon Sep 17 00:00:00 2001 From: Sigilante Date: Thu, 29 Feb 2024 16:21:59 -0600 Subject: [PATCH] Add Urbit headers and start adding rounding modes. --- README.md | 3 +- include/allocate.h | 746 ++++++++++++++++++++++++++++++++++++++++ include/error.h | 38 ++ include/events.h | 128 +++++++ include/hashtable.h | 192 +++++++++++ include/imprison.h | 170 +++++++++ include/jets.h | 317 +++++++++++++++++ include/log.h | 24 ++ include/manage.h | 201 +++++++++++ include/nock.h | 144 ++++++++ include/noun.h | 26 ++ include/options.h | 57 +++ include/retrieve.h | 538 +++++++++++++++++++++++++++++ include/serial.h | 137 ++++++++ include/softblas.h | 156 +++++---- include/trace.h | 187 ++++++++++ include/types.h | 64 ++++ include/urth.h | 44 +++ include/version.h | 28 ++ include/vortex.h | 146 ++++++++ include/xtract.h | 154 +++++++++ include/zave.h | 68 ++++ src/blas/level1/daxpy.c | 3 +- src/blas/level1/dcopy.c | 3 +- src/blas/level1/ddot.c | 3 +- src/blas/level1/dnrm2.c | 3 +- src/blas/level1/dscal.c | 3 +- src/blas/level1/dswap.c | 3 +- src/blas/level1/hasum.c | 3 +- src/blas/level1/haxpy.c | 3 +- src/blas/level1/hcopy.c | 3 +- src/blas/level1/hdot.c | 3 +- src/blas/level1/hnrm2.c | 3 +- src/blas/level1/hrot.c | 1 + src/blas/level1/hscal.c | 3 +- src/blas/level1/hswap.c | 3 +- 36 files changed, 3532 insertions(+), 76 deletions(-) create mode 100644 include/allocate.h create mode 100644 include/error.h create mode 100644 include/events.h create mode 100644 include/hashtable.h create mode 100644 include/imprison.h create mode 100644 include/jets.h create mode 100644 include/log.h create mode 100644 include/manage.h create mode 100644 include/nock.h create mode 100644 include/noun.h create mode 100644 include/options.h create mode 100644 include/retrieve.h create mode 100644 include/serial.h create mode 100644 include/trace.h create mode 100644 include/types.h create mode 100644 include/urth.h create mode 100644 include/version.h create mode 100644 include/vortex.h create mode 100644 include/xtract.h create mode 100644 include/zave.h diff --git a/README.md b/README.md index f591775..e430e88 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,9 @@ A BLAS/LAPACK implementation using [Berkeley SoftFloat](http://www.jhauser.us/ar Following SoftFloat 3e and requiring a 64-bit OS, all quantities are passed by value. -**Status WIP ~2024.2.28** +**Status WIP ~2024.2.29** +- [ ] Add rounding-mode propagation to fn signatures. - [ ] Complete complex-valued functions. - [ ] Run everything through a linter. diff --git a/include/allocate.h b/include/allocate.h new file mode 100644 index 0000000..b851b64 --- /dev/null +++ b/include/allocate.h @@ -0,0 +1,746 @@ +#ifndef U3_ALLOCATE_H +#define U3_ALLOCATE_H + +#include "error.h" +#include "manage.h" + + /** Constants. + **/ + /* u3a_bits: number of bits in word-addressed pointer. 29 == 2GB. + */ +# define u3a_bits U3_OS_LoomBits /* 30 */ + + /* u3a_vits: number of virtual bits in a noun reference gained via shifting + */ +# define u3a_vits 1 + + /* u3a_walign: references into the loom are guaranteed to be word-aligned to: + */ +# define u3a_walign (1 << u3a_vits) + + /* u3a_balign: u3a_walign in bytes + */ +# define u3a_balign (sizeof(c3_w)*u3a_walign) + + /* u3a_bits_max: max loom bex + */ +# define u3a_bits_max (8 * sizeof(c3_w) + u3a_vits) + + /* u3a_page: number of bits in word-addressed page. 12 == 16K page + */ +# define u3a_page 12ULL + + /* u3a_pages: maximum number of pages in memory. + */ +# define u3a_pages (1ULL << (u3a_bits + u3a_vits - u3a_page) ) + + /* u3a_words: maximum number of words in memory. + */ +# define u3a_words ( 1ULL << (u3a_bits + u3a_vits)) + + /* u3a_bytes: maximum number of bytes in memory. + */ +# define u3a_bytes ((sizeof(c3_w) * u3a_words)) + + /* u3a_cells: number of representable cells. + */ +# define u3a_cells (( u3a_words / u3a_minimum )) + + /* u3a_maximum: maximum loom object size (largest possible atom). + */ +# define u3a_maximum ( u3a_words - (c3_wiseof(u3a_box) + c3_wiseof(u3a_atom) + 1)) + + /* u3a_minimum: minimum loom object size (actual size of a cell). + */ +# define u3a_minimum ((c3_w)( 1 + c3_wiseof(u3a_box) + c3_wiseof(u3a_cell) )) + + /* u3a_fbox_no: number of free lists per size. + */ +# define u3a_fbox_no 27 + + /** Structures. + **/ + /* u3a_atom, u3a_cell: logical atom and cell structures. + */ + typedef struct { + c3_w mug_w; + } u3a_noun; + + typedef struct { + c3_w mug_w; + c3_w len_w; + c3_w buf_w[0]; + } u3a_atom; + + typedef struct { + c3_w mug_w; + u3_noun hed; + u3_noun tel; + } u3a_cell; + + /* u3a_box: classic allocation box. + ** + ** The box size is also stored at the end of the box in classic + ** bad ass malloc style. Hence a box is: + ** + ** --- + ** siz_w + ** use_w + ** user data + ** siz_w + ** --- + ** + ** Do not attempt to adjust this structure! + */ + typedef struct _u3a_box { + c3_w siz_w; // size of this box + c3_w use_w; // reference count; free if 0 +# ifdef U3_MEMORY_DEBUG + c3_w eus_w; // recomputed refcount + c3_w cod_w; // tracing code +# endif + } u3a_box; + + /* u3a_fbox: free node in heap. Sets minimum node size. + */ + typedef struct _u3a_fbox { + u3a_box box_u; + u3p(struct _u3a_fbox) pre_p; + u3p(struct _u3a_fbox) nex_p; + } u3a_fbox; + + /* u3a_jets: jet dashboard + */ + typedef struct _u3a_jets { + u3p(u3h_root) hot_p; // hot state (home road only) + u3p(u3h_root) war_p; // warm state + u3p(u3h_root) cod_p; // cold state + u3p(u3h_root) han_p; // hank cache + u3p(u3h_root) bas_p; // battery hashes + } u3a_jets; + + /* u3a_road: contiguous allocation and execution context. + */ + typedef struct _u3a_road { + u3p(struct _u3a_road) par_p; // parent road + u3p(struct _u3a_road) kid_p; // child road list + u3p(struct _u3a_road) nex_p; // sibling road + + u3p(c3_w) cap_p; // top of transient region + u3p(c3_w) hat_p; // top of durable region + u3p(c3_w) mat_p; // bottom of transient region + u3p(c3_w) rut_p; // bottom of durable region + u3p(c3_w) ear_p; // original cap if kid is live + + c3_w fut_w[32]; // futureproof buffer + + struct { // escape buffer + union { + jmp_buf buf; + c3_w buf_w[256]; // futureproofing + }; + } esc; + + struct { // miscellaneous config + c3_w fag_w; // flag bits + } how; // + + struct { // allocation pools + u3p(u3a_fbox) fre_p[u3a_fbox_no]; // heap by node size log + u3p(u3a_fbox) cel_p; // custom cell allocator + c3_w fre_w; // number of free words + c3_w max_w; // maximum allocated + } all; + + u3a_jets jed; // jet dashboard + + struct { // bytecode state + u3p(u3h_root) har_p; // formula->post of bytecode + } byc; + + struct { // scry namespace + u3_noun gul; // (list $+(* (unit (unit)))) now + } ski; + + struct { // trace stack + u3_noun tax; // (list ,*) + u3_noun mer; // emergency buffer to release + } bug; + + struct { // profile stack + c3_d nox_d; // nock steps + c3_d cel_d; // cell allocations + u3_noun don; // (list batt) + u3_noun trace; // (list trace) + u3_noun day; // doss, only in u3H (moveme) + } pro; + + struct { // memoization caches + u3p(u3h_root) har_p; // transient + u3p(u3h_root) per_p; // persistent + } cax; + } u3a_road; + typedef u3a_road u3_road; + + /* u3a_flag: flags for how.fag_w. All arena related. + */ + enum u3a_flag { + u3a_flag_sand = 0x1, // bump allocation (XX not impl) + }; + + /* u3a_pile: stack control, abstracted over road direction. + */ + typedef struct _u3a_pile { + c3_ws mov_ws; + c3_ws off_ws; + u3_post top_p; +#ifdef U3_MEMORY_DEBUG + u3a_road* rod_u; +#endif + } u3a_pile; + + /** Macros. Should be better commented. + **/ + /* In and out of the box. + u3a_boxed -> sizeof u3a_box + allocation size (len_w) + 1 (for storing the redundant size) + u3a_boxto -> the region of memory adjacent to the box. + u3a_botox -> the box adjacent to the region of memory + */ +# define u3a_boxed(len_w) (len_w + c3_wiseof(u3a_box) + 1) +# define u3a_boxto(box_v) ( (void *) \ + ( (u3a_box *)(void *)(box_v) + 1 ) ) +# define u3a_botox(tox_v) ( (u3a_box *)(void *)(tox_v) - 1 ) + + /* Inside a noun. + */ + + /* u3a_is_cat(): yes if noun [som] is direct atom. + */ +# define u3a_is_cat(som) (((som) >> 31) ? c3n : c3y) + + /* u3a_is_dog(): yes if noun [som] is indirect noun. + */ +# define u3a_is_dog(som) (((som) >> 31) ? c3y : c3n) + + /* u3a_is_pug(): yes if noun [som] is indirect atom. + */ +# define u3a_is_pug(som) ((0b10 == ((som) >> 30)) ? c3y : c3n) + + /* u3a_is_pom(): yes if noun [som] is indirect cell. + */ +# define u3a_is_pom(som) ((0b11 == ((som) >> 30)) ? c3y : c3n) + + /* u3a_is_atom(): yes if noun [som] is direct atom or indirect atom. + */ +# define u3a_is_atom(som) c3o(u3a_is_cat(som), \ + u3a_is_pug(som)) +# define u3ud(som) u3a_is_atom(som) + + /* u3a_is_cell: yes if noun [som] is cell. + */ +# define u3a_is_cell(som) u3a_is_pom(som) +# define u3du(som) u3a_is_cell(som) + + /* u3a_h(): get head of cell [som]. Bail if [som] is not cell. + */ +# define u3a_h(som) \ + ( _(u3a_is_cell(som)) \ + ? ( ((u3a_cell *)u3a_to_ptr(som))->hed )\ + : u3m_bail(c3__exit) ) +# define u3h(som) u3a_h(som) + + /* u3a_t(): get tail of cell [som]. Bail if [som] is not cell. + */ +# define u3a_t(som) \ + ( _(u3a_is_cell(som)) \ + ? ( ((u3a_cell *)u3a_to_ptr(som))->tel )\ + : u3m_bail(c3__exit) ) +# define u3t(som) u3a_t(som) + +# define u3to(type, x) ((type *)u3a_into(x)) +# define u3tn(type, x) (x) ? (type*)u3a_into(x) : (void*)NULL + +# define u3of(type, x) (u3a_outa((type*)x)) + + /* u3a_is_north(): yes if road [r] is north road. + */ +# define u3a_is_north(r) __((r)->cap_p > (r)->hat_p) + + /* u3a_is_south(): yes if road [r] is south road. + */ +# define u3a_is_south(r) !u3a_is_north((r)) + + /* u3a_open(): words of contiguous free space in road [r] + */ +# define u3a_open(r) ( (c3y == u3a_is_north(r)) \ + ? (c3_w)((r)->cap_p - (r)->hat_p) \ + : (c3_w)((r)->hat_p - (r)->cap_p) ) + + /* u3a_full(): total words in road [r]; + ** u3a_full(r) == u3a_heap(r) + u3a_temp(r) + u3a_open(r) + */ +# define u3a_full(r) ( (c3y == u3a_is_north(r)) \ + ? (c3_w)((r)->mat_p - (r)->rut_p) \ + : (c3_w)((r)->rut_p - (r)->mat_p) ) + + /* u3a_heap(): words of heap in road [r] + */ +# define u3a_heap(r) ( (c3y == u3a_is_north(r)) \ + ? (c3_w)((r)->hat_p - (r)->rut_p) \ + : (c3_w)((r)->rut_p - (r)->hat_p) ) + + /* u3a_temp(): words of stack in road [r] + */ +# define u3a_temp(r) ( (c3y == u3a_is_north(r)) \ + ? (c3_w)((r)->mat_p - (r)->cap_p) \ + : (c3_w)((r)->cap_p - (r)->mat_p) ) + +# define u3a_north_is_senior(r, dog) \ + __((u3a_to_off(dog) < (r)->rut_p) || \ + (u3a_to_off(dog) >= (r)->mat_p)) + +# define u3a_north_is_junior(r, dog) \ + __((u3a_to_off(dog) >= (r)->cap_p) && \ + (u3a_to_off(dog) < (r)->mat_p)) + +# define u3a_north_is_normal(r, dog) \ + c3a(!(u3a_north_is_senior(r, dog)), \ + !(u3a_north_is_junior(r, dog))) + +# define u3a_south_is_senior(r, dog) \ + __((u3a_to_off(dog) < (r)->mat_p) || \ + (u3a_to_off(dog) >= (r)->rut_p)) + +# define u3a_south_is_junior(r, dog) \ + __((u3a_to_off(dog) < (r)->cap_p) && \ + (u3a_to_off(dog) >= (r)->mat_p)) + +# define u3a_south_is_normal(r, dog) \ + c3a(!(u3a_south_is_senior(r, dog)), \ + !(u3a_south_is_junior(r, dog))) + +# define u3a_is_junior(r, som) \ + ( _(u3a_is_cat(som)) \ + ? c3n \ + : _(u3a_is_north(r)) \ + ? u3a_north_is_junior(r, som) \ + : u3a_south_is_junior(r, som) ) + +# define u3a_is_senior(r, som) \ + ( _(u3a_is_cat(som)) \ + ? c3y \ + : _(u3a_is_north(r)) \ + ? u3a_north_is_senior(r, som) \ + : u3a_south_is_senior(r, som) ) + +# define u3a_is_mutable(r, som) \ + ( _(u3a_is_atom(som)) \ + ? c3n \ + : _(u3a_is_senior(r, som)) \ + ? c3n \ + : _(u3a_is_junior(r, som)) \ + ? c3n \ + : (u3a_botox(u3a_to_ptr(som))->use_w == 1) \ + ? c3y : c3n ) + +/* like _box_vaal but for rods. Again, probably want to prefix validation + functions at the very least. Maybe they can be defined in their own header. + + ps. while arguably cooler to have this compile to + + do {(void(0));(void(0));} while(0) + + It may be nicer to just wrap an inline function in #ifdef C3DBG guards. You + could even return the then validated road like + + u3a_road f() { + u3a_road rod_u; + ... + return _rod_vaal(rod_u); + } +*/ +# define _rod_vaal(rod_u) \ + do { \ + c3_dessert(((uintptr_t)((u3a_road*)(rod_u))->hat_p \ + & u3a_walign-1) == 0); \ + } while(0) + + + + /** Globals. + **/ + /// Current road (thread-local). + extern u3_road* u3a_Road; +# define u3R u3a_Road + + /* u3_Code: memory code. + */ +#ifdef U3_MEMORY_DEBUG + extern c3_w u3_Code; +#endif + +# define u3_Loom ((c3_w *)(void *)U3_OS_LoomBase) + + /* u3a_into(): convert loom offset [x] into generic pointer. + */ +# define u3a_into(x) ((void *)(u3_Loom + (x))) + + /* u3a_outa(): convert pointer [p] into word offset into loom. + */ +# define u3a_outa(p) ((c3_w *)(void *)(p) - u3_Loom) + + /* u3a_to_off(): mask off bits 30 and 31 from noun [som]. + */ +# define u3a_to_off(som) (((som) & 0x3fffffff) << u3a_vits) + + /* u3a_to_ptr(): convert noun [som] into generic pointer into loom. + */ +# define u3a_to_ptr(som) (u3a_into(u3a_to_off(som))) + + /* u3a_to_wtr(): convert noun [som] into word pointer into loom. + */ +# define u3a_to_wtr(som) ((c3_w *)u3a_to_ptr(som)) + + /** Inline functions. + **/ + /* u3a_to_pug(): set bit 31 of [off]. + */ + inline c3_w u3a_to_pug(c3_w off) { + c3_dessert((off & u3a_walign-1) == 0); + return (off >> u3a_vits) | 0x80000000; + } + + /* u3a_to_pom(): set bits 30 and 31 of [off]. + */ + inline c3_w u3a_to_pom(c3_w off) { + c3_dessert((off & u3a_walign-1) == 0); + return (off >> u3a_vits) | 0xc0000000; + } + + /** road stack. + **/ + /* u3a_drop(): drop a road stack frame per [pil_u]. + */ + inline void + u3a_drop(const u3a_pile* pil_u) + { + u3R->cap_p -= pil_u->mov_ws; + } + + /* u3a_peek(): examine the top of the road stack. + */ + inline void* + u3a_peek(const u3a_pile* pil_u) + { + return u3to(void, (u3R->cap_p + pil_u->off_ws)); + } + + /* u3a_pop(): drop a road stack frame, peek at the new top. + */ + inline void* + u3a_pop(const u3a_pile* pil_u) + { + u3a_drop(pil_u); + return u3a_peek(pil_u); + } + + /* u3a_push(): push a frame onto the road stack, per [pil_u]. + */ + inline void* + u3a_push(const u3a_pile* pil_u) + { + u3R->cap_p += pil_u->mov_ws; + +#ifndef U3_GUARD_PAGE + // !off means we're on a north road + // + if ( !pil_u->off_ws ) { + if( !(u3R->cap_p > u3R->hat_p) ) { + u3m_bail(c3__meme); + } +# ifdef U3_MEMORY_DEBUG + u3_assert( pil_u->top_p >= u3R->cap_p ); +# endif + } + else { + if( !(u3R->cap_p < u3R->hat_p) ) { + u3m_bail(c3__meme); + } +# ifdef U3_MEMORY_DEBUG + u3_assert( pil_u->top_p <= u3R->cap_p ); +# endif + } +#endif /* ifndef U3_GUARD_PAGE */ + +#ifdef U3_MEMORY_DEBUG + u3_assert( pil_u->rod_u == u3R ); +#endif + + return u3a_peek(pil_u); + } + + /* u3a_pile_done(): assert valid upon completion. + */ + inline c3_o + u3a_pile_done(const u3a_pile* pil_u) + { + return (pil_u->top_p == u3R->cap_p) ? c3y : c3n; + } + + /** Functions. + **/ + /** Allocation. + **/ + /* Word-aligned allocation. + */ + /* u3a_walloc(): allocate storage measured in words. + */ + void* + u3a_walloc(c3_w len_w); + + /* u3a_celloc(): allocate a cell. Faster, sometimes. + */ + c3_w* + u3a_celloc(void); + + /* u3a_wfree(): free storage. + */ + void + u3a_wfree(void* lag_v); + + /* u3a_wtrim(): trim storage. + */ + void + u3a_wtrim(void* tox_v, c3_w old_w, c3_w len_w); + + /* u3a_wealloc(): word realloc. + */ + void* + u3a_wealloc(void* lag_v, c3_w len_w); + + /* u3a_pile_prep(): initialize stack control. + */ + void + u3a_pile_prep(u3a_pile* pil_u, c3_w len_w); + + /* C-style aligned allocation - *not* compatible with above. + */ + /* u3a_malloc(): aligned storage measured in bytes. + */ + void* + u3a_malloc(size_t len_i); + + /* u3a_calloc(): aligned storage measured in bytes. + */ + void* + u3a_calloc(size_t num_i, size_t len_i); + + /* u3a_realloc(): aligned realloc in bytes. + */ + void* + u3a_realloc(void* lag_v, size_t len_i); + + /* u3a_free(): free for aligned malloc. + */ + void + u3a_free(void* tox_v); + + /* Reference and arena control. + */ + /* u3a_gain(): gain a reference count in normal space. + */ + u3_weak + u3a_gain(u3_weak som); +# define u3k(som) u3a_gain(som) + + /* u3a_take(): gain, copying juniors. + */ + u3_noun + u3a_take(u3_noun som); + + /* u3a_left(): true of junior if preserved. + */ + c3_o + u3a_left(u3_noun som); + + /* u3a_lose(): lose a reference. + */ + void + u3a_lose(u3_weak som); +# define u3z(som) u3a_lose(som) + + /* u3a_wash(): wash all lazy mugs in subtree. RETAIN. + */ + void + u3a_wash(u3_noun som); + + /* u3a_use(): reference count. + */ + c3_w + u3a_use(u3_noun som); + + /* u3a_wed(): unify noun references. + */ + void + u3a_wed(u3_noun* a, u3_noun* b); + + /* u3a_luse(): check refcount sanity. + */ + void + u3a_luse(u3_noun som); + + /* u3a_mark_ptr(): mark a pointer for gc. Produce size. + */ + c3_w + u3a_mark_ptr(void* ptr_v); + + /* u3a_mark_mptr(): mark a u3_malloc-allocated ptr for gc. + */ + c3_w + u3a_mark_mptr(void* ptr_v); + + /* u3a_mark_noun(): mark a noun for gc. Produce size. + */ + c3_w + u3a_mark_noun(u3_noun som); + + /* u3a_mark_road(): mark ad-hoc persistent road structures. + */ + c3_w + u3a_mark_road(FILE* fil_u); + + /* u3a_reclaim(): clear ad-hoc persistent caches to reclaim memory. + */ + void + u3a_reclaim(void); + + /* u3a_rewrite_compact(): rewrite pointers in ad-hoc persistent road structures. + */ + void + u3a_rewrite_compact(void); + + /* u3a_rewrite_ptr(): mark a pointer as already having been rewritten + */ + c3_o + u3a_rewrite_ptr(void* ptr_v); + + /* u3a_rewrite_noun(): rewrite a noun for compaction. + */ + void + u3a_rewrite_noun(u3_noun som); + + /* u3a_rewritten(): rewrite a pointer for compaction. + */ + u3_post + u3a_rewritten(u3_post som_p); + + /* u3a_rewritten(): rewritten noun pointer for compaction. + */ + u3_noun + u3a_rewritten_noun(u3_noun som); + + /* u3a_count_noun(): count size of noun. + */ + c3_w + u3a_count_noun(u3_noun som); + + /* u3a_discount_noun(): clean up after counting a noun. + */ + c3_w + u3a_discount_noun(u3_noun som); + + /* u3a_count_ptr(): count a pointer for gc. Produce size. */ + c3_w + u3a_count_ptr(void* ptr_v); + + /* u3a_discount_ptr(): discount a pointer for gc. Produce size. */ + c3_w + u3a_discount_ptr(void* ptr_v); + + /* u3a_idle(): measure free-lists in [rod_u] + */ + c3_w + u3a_idle(u3a_road* rod_u); + + /* u3a_ream(): ream free-lists. + */ + void + u3a_ream(void); + + /* u3a_sweep(): sweep a fully marked road. + */ + c3_w + u3a_sweep(void); + + /* u3a_pack_seek(): sweep the heap, modifying boxes to record new addresses. + */ + void + u3a_pack_seek(u3a_road* rod_u); + + /* u3a_pack_move(): sweep the heap, moving boxes to new addresses. + */ + void + u3a_pack_move(u3a_road* rod_u); + + /* u3a_sane(): check allocator sanity. + */ + void + u3a_sane(void); + + /* u3a_lush(): leak push. + */ + c3_w + u3a_lush(c3_w lab_w); + + /* u3a_lop(): leak pop. + */ + void + u3a_lop(c3_w lab_w); + + /* u3a_print_time: print microsecond time. + */ + void + u3a_print_time(c3_c* str_c, c3_c* cap_c, c3_d mic_d); + + /* u3a_print_memory(): print memory amount. + */ + void + u3a_print_memory(FILE* fil_u, c3_c* cap_c, c3_w wor_w); + + /* u3a_prof(): mark/measure/print memory profile. RETAIN. + */ + c3_w + u3a_prof(FILE* fil_u, c3_w den_w, u3_noun mas); + + /* u3a_maid(): maybe print memory. + */ + c3_w + u3a_maid(FILE* fil_u, c3_c* cap_c, c3_w wor_w); + + /* u3a_deadbeef(): write 0xdeadbeef from hat to cap. + */ + void + u3a_deadbeef(void); + + /* u3a_walk_fore(): preorder traversal, visits ever limb of a noun. + ** + ** cells are visited *before* their heads and tails + ** and can shortcircuit traversal by returning [c3n] + */ + void + u3a_walk_fore(u3_noun a, + void* ptr_v, + void (*pat_f)(u3_atom, void*), + c3_o (*cel_f)(u3_noun, void*)); + + /* u3a_string(): `a` as an on-loom c-string. + */ + c3_c* + u3a_string(u3_atom a); + + /* u3a_loom_sane(): sanity checks the state of the loom for obvious corruption + */ + void + u3a_loom_sane(); + +#endif /* ifndef U3_ALLOCATE_H */ diff --git a/include/error.h b/include/error.h new file mode 100644 index 0000000..57932bf --- /dev/null +++ b/include/error.h @@ -0,0 +1,38 @@ +/// @file + +#ifndef U3_ERROR_H +#define U3_ERROR_H + +#include "manage.h" + +/* Assert. Good to capture. + + TODO: determine which u3_assert calls can rather call c3_dessert, i.e. in + public releases, which calls to u3_assert should abort and which should + no-op? If the latter, is the assert useful inter-development to validate + conditions we might accidentally break or not useful at all? +*/ + +#if defined(ASAN_ENABLED) && defined(__clang__) +# define u3_assert(x) \ + do { \ + if (!(x)) { \ + u3m_bail(c3__oops); \ + abort(); \ + } \ + } while(0) +#else +# define u3_assert(x) \ + do { \ + if (!(x)) { \ + fflush(stderr); \ + fprintf(stderr, "\rAssertion '%s' " \ + "failed in %s:%d\r\n", \ + #x, __FILE__, __LINE__); \ + u3m_bail(c3__oops); \ + abort(); \ + } \ + } while(0) +#endif /* if defined(ASAN_ENABLED) && defined(__clang__) */ + +#endif /* ifndef U3_ERROR_H */ diff --git a/include/events.h b/include/events.h new file mode 100644 index 0000000..7442c9d --- /dev/null +++ b/include/events.h @@ -0,0 +1,128 @@ +/// @file + +#ifndef U3_EVENTS_H +#define U3_EVENTS_H + +#include "c3.h" +#include "allocate.h" +#include "version.h" + + /** Data structures. + **/ + /* u3e_line: control line. + */ + typedef struct _u3e_line { + c3_w pag_w; + c3_w mug_w; + } u3e_line; + + /* u3e_control: memory change, control file. + */ + typedef struct _u3e_control { + u3e_version ver_w; // version number + c3_w nor_w; // new page count north + c3_w sou_w; // new page count south + c3_w pgs_w; // number of changed pages + u3e_line mem_u[0]; // per page + } u3e_control; + + /* u3_cs_patch: memory change, top level. + */ + typedef struct _u3_cs_patch { + c3_i ctl_i; + c3_i mem_i; + u3e_control* con_u; + } u3_ce_patch; + + /* u3e_image: memory segment, open file. + */ + typedef struct _u3e_image { + c3_c* nam_c; // segment name + c3_i fid_i; // open file, or 0 + c3_w pgs_w; // length in pages + } u3e_image; + + /* u3e_pool: entire memory system. + */ + typedef struct _u3e_pool { + c3_c* dir_c; // path to + c3_i eph_i; // ephemeral file descriptor + c3_w dit_w[u3a_pages >> 5]; // touched since last save + c3_w pag_w; // number of pages (<= u3a_pages) + c3_w gar_w; // guard page + u3e_image nor_u; // north segment + u3e_image sou_u; // south segment + } u3e_pool; + + /* u3e_flaw: loom fault result. + */ + typedef enum { + u3e_flaw_sham = 0, // bogus state + u3e_flaw_base = 1, // vm fail (mprotect) + u3e_flaw_meme = 2, // bail:meme + u3e_flaw_good = 3 // handled + } u3e_flaw; + + /** Globals. + **/ + /// Snapshotting system. + extern u3e_pool u3e_Pool; +# define u3P u3e_Pool + + /** Constants. + **/ + + /** Functions. + **/ + /* u3e_backup(): copy the snapshot from [pux_c] to [pax_c], + * overwriting optional. + */ + c3_o + u3e_backup(c3_c* pux_c, c3_c* pax_c, c3_o ovw_o); + + /* u3e_fault(): handle a memory fault. + */ + u3e_flaw + u3e_fault(u3_post low_p, u3_post hig_p, u3_post off_p); + + /* u3e_save(): update the checkpoint. + */ + void + u3e_save(u3_post low_p, u3_post hig_p); + + /* u3e_toss(): discard ephemeral pages. + */ + void + u3e_toss(u3_post low_p, u3_post hig_p); + + /* u3e_live(): start the persistence system. Return c3y if no image. + */ + c3_o + u3e_live(c3_o nuu_o, c3_c* dir_c); + + /* u3e_stop(): gracefully stop the persistence system. + */ + void + u3e_stop(void); + + /* u3e_yolo(): disable dirty page tracking, read/write whole loom. + */ + c3_o + u3e_yolo(void); + + /* u3e_foul(): dirty all the pages of the loom. + */ + void + u3e_foul(void); + + /* u3e_init(): initialize guard page tracking. + */ + void + u3e_init(void); + + /* u3e_ward(): reposition guard page if needed. + */ + void + u3e_ward(u3_post low_p, u3_post hig_p); + +#endif /* ifndef U3_EVENTS_H */ diff --git a/include/hashtable.h b/include/hashtable.h new file mode 100644 index 0000000..69ba962 --- /dev/null +++ b/include/hashtable.h @@ -0,0 +1,192 @@ +#ifndef U3_HASHTABLE_H +#define U3_HASHTABLE_H + +#include "c3.h" +#include "types.h" + + /** Data structures. + **/ + /** Straightforward implementation of the classic Bagwell + *** HAMT (hash array mapped trie), using a mug hash. + *** + *** Because a mug is 31 bits, the root table has 64 slots. + *** The 31 bits of a mug are divided into the first lookup, + *** which is 6 bits (corresponding to the 64 entries in the + *** root table), followed by 5 more branchings of 5 bits each, + *** corresponding to the 32-slot nodes for everything under + *** the root node. + *** + *** We store an extra "freshly warm" bit and use it for a simple + *** clock-algorithm reclamation policy. + **/ + /* u3h_slot: map slot. + ** + ** Either a key-value cell or a loom offset, decoded as a pointer + ** to a u3h_node, or a u3h_buck at the bottom. Matches the u3_noun + ** format - coordinate with allocate.h. The top two bits are: + ** + ** 00 - empty (in the root table only) + ** 01 - table (node or buck) + ** 02 - entry, stale + ** 03 - entry, fresh + */ + typedef c3_w u3h_slot; + + /* u3h_node: map node. + */ + typedef struct { + c3_w map_w; // bitmap for [sot_w] + u3h_slot sot_w[0]; // filled slots + } u3h_node; + + /* u3h_root: hash root table + */ + typedef struct { + c3_w max_w; // number of cache lines (0 for no trimming) + c3_w use_w; // number of lines currently filled + struct { + c3_w mug_w; // current hash + c3_w inx_w; // index into current hash bucket + c3_o buc_o; // XX remove + } arm_u; // clock arm + u3h_slot sot_w[64]; // slots + } u3h_root; + + /* u3h_buck: bottom bucket. + */ + typedef struct { + c3_w len_w; // length of [sot_w] + u3h_slot sot_w[0]; // filled slots + } u3h_buck; + + /** HAMT macros. + *** + *** Coordinate with u3_noun definition! + **/ + /* u3h_slot_is_null(): yes iff slot is empty + ** u3h_slot_is_noun(): yes iff slot contains a key/value cell + ** u3h_slot_is_node(): yes iff slot contains a subtable/bucket + ** u3h_slot_is_warm(): yes iff fresh bit is set + ** u3h_slot_to_node(): slot to node pointer + ** u3h_node_to_slot(): node pointer to slot + ** u3h_slot_to_noun(): slot to cell + ** u3h_noun_to_slot(): cell to slot + ** u3h_noun_be_warm(): warm mutant + ** u3h_noun_be_cold(): cold mutant + */ +# define u3h_slot_is_null(sot) ((0 == ((sot) >> 30)) ? c3y : c3n) +# define u3h_slot_is_node(sot) ((1 == ((sot) >> 30)) ? c3y : c3n) +# define u3h_slot_is_noun(sot) ((1 == ((sot) >> 31)) ? c3y : c3n) +# define u3h_slot_is_warm(sot) (((sot) & 0x40000000) ? c3y : c3n) +# define u3h_slot_to_node(sot) (u3a_into(((sot) & 0x3fffffff) << u3a_vits)) +# define u3h_node_to_slot(ptr) ((u3a_outa((ptr)) >> u3a_vits) | 0x40000000) +# define u3h_noun_be_warm(sot) ((sot) | 0x40000000) +# define u3h_noun_be_cold(sot) ((sot) & ~0x40000000) +# define u3h_slot_to_noun(sot) (0x40000000 | (sot)) +# define u3h_noun_to_slot(som) (u3h_noun_be_warm(som)) + + /** Functions. + *** + *** Needs: delete and merge functions; clock reclamation function. + **/ + /* u3h_new_cache(): create hashtable with bounded size. + */ + u3p(u3h_root) + u3h_new_cache(c3_w clk_w); + + /* u3h_new(): create hashtable. + */ + u3p(u3h_root) + u3h_new(void); + + /* u3h_put(): insert in hashtable. + ** + ** `key` is RETAINED; `val` is transferred. + */ + void + u3h_put(u3p(u3h_root) har_p, u3_noun key, u3_noun val); + + /* u3h_uni(): unify hashtables, copying [rah_p] into [har_p] + */ + void + u3h_uni(u3p(u3h_root) har_p, u3p(u3h_root) rah_p); + + /* u3h_get(): read from hashtable. + ** + ** `key` is RETAINED; result is PRODUCED. + */ + u3_weak + u3h_get(u3p(u3h_root) har_p, u3_noun key); + + /* u3h_git(): read from hashtable, retaining result. + ** + ** `key` is RETAINED; result is RETAINED. + */ + u3_weak + u3h_git(u3p(u3h_root) har_p, u3_noun key); + + /* u3h_trim_to(): trim to n key-value pairs + */ + void + u3h_trim_to(u3p(u3h_root) har_p, c3_w n_w); + + /* u3h_free(): free hashtable. + */ + void + u3h_free(u3p(u3h_root) har_p); + + /* u3h_mark(): mark hashtable for gc. + */ + c3_w + u3h_mark(u3p(u3h_root) har_p); + + /* u3h_rewrite(): rewrite hashtable for compaction. + */ + void + u3h_rewrite(u3p(u3h_root) har_p); + + /* u3h_count(): count hashtable for gc. + */ + c3_w + u3h_count(u3p(u3h_root) har_p); + + /* u3h_discount(): discount hashtable for gc. + */ + c3_w + u3h_discount(u3p(u3h_root) har_p); + + /* u3h_walk_with(): traverse hashtable with key, value fn and data + * argument; RETAINS. + */ + void + u3h_walk_with(u3p(u3h_root) har_p, + void (*fun_f)(u3_noun, void*), + void* wit); + + /* u3h_walk(): u3h_walk_with, but with no data argument + */ + void + u3h_walk(u3p(u3h_root) har_p, void (*fun_f)(u3_noun)); + + /* u3h_take_with(): gain hashtable, copying junior keys + ** and calling [fun_f] on values + */ + u3p(u3h_root) + u3h_take_with(u3p(u3h_root) har_p, u3_funk fun_f); + + /* u3h_take(): gain hashtable, copying junior nouns + */ + u3p(u3h_root) + u3h_take(u3p(u3h_root) har_p); + + /* u3h_take_uni(): take entries from [src_p], put into [dst_p]. + */ + void + u3h_take_uni(u3p(u3h_root) dst_p, u3p(u3h_root) src_p); + + /* u3h_wyt(): number of entries + */ + c3_w + u3h_wyt(u3p(u3h_root) har_p); + +#endif /* ifndef U3_HASHTABLE_H */ diff --git a/include/imprison.h b/include/imprison.h new file mode 100644 index 0000000..be8b604 --- /dev/null +++ b/include/imprison.h @@ -0,0 +1,170 @@ +/// @file + +#ifndef U3_IMPRISON_H +#define U3_IMPRISON_H + +#include "allocate.h" +#include "c3.h" +#include "gmp.h" +#include "types.h" + + /** Structures. + **/ + /* u3i_slab: atom builder. + */ + typedef struct _u3i_slab { + struct { // internals + u3a_atom* _vat_u; // heap atom (nullable) + c3_w _sat_w; // static storage + } _; // + union { // + c3_y* buf_y; // bytes + c3_w* buf_w; // words + }; // + c3_w len_w; // word length + } u3i_slab; + + /* staged atom-building api + */ + /* u3i_slab_init(): configure bloq-length slab, zero-initialize. + */ + void + u3i_slab_init(u3i_slab* sab_u, c3_g met_g, c3_d len_d); + + /* u3i_slab_bare(): configure bloq-length slab, uninitialized. + */ + void + u3i_slab_bare(u3i_slab* sab_u, c3_g met_g, c3_d len_d); + + /* u3i_slab_from(): configure bloq-length slab, initialize with [a]. + */ + void + u3i_slab_from(u3i_slab* sab_u, u3_atom a, c3_g met_g, c3_d len_d); + + /* u3i_slab_grow(): resize slab, zero-initializing new space. + */ + void + u3i_slab_grow(u3i_slab* sab_u, c3_g met_g, c3_d len_d); + + /* u3i_slab_free(): dispose memory backing slab. + */ + void + u3i_slab_free(u3i_slab* sab_u); + + /* u3i_slab_mint(): produce atom from slab, trimming. + */ + u3_atom + u3i_slab_mint(u3i_slab* sab_u); + + /* u3i_slab_moot(): produce atom from slab, no trimming. + */ + u3_atom + u3i_slab_moot(u3i_slab* sab_u); + + /* u3i_slab_mint_bytes(): produce atom from byte-slab, trimming. + ** XX assumes little-endian, implement swap to support big-endian + */ +# define u3i_slab_mint_bytes u3i_slab_mint + + /* u3i_slab_moot_bytes(): produce atom from byte-slab, no trimming. + ** XX assumes little-endian, implement swap to support big-endian + */ +# define u3i_slab_moot_bytes u3i_slab_moot + + /* General constructors. + */ + /* u3i_word(): construct u3_atom from c3_w. + */ + u3_atom + u3i_word(c3_w dat_w); + + /* u3i_chub(): construct u3_atom from c3_d. + */ + u3_atom + u3i_chub(c3_d dat_d); + + /* u3i_bytes(): Copy [a] bytes from [b] to an LSB first atom. + */ + u3_atom + u3i_bytes(c3_w a_w, + const c3_y* b_y); + + /* u3i_words(): Copy [a] words from [b] into an atom. + */ + u3_atom + u3i_words(c3_w a_w, + const c3_w* b_w); + + /* u3i_chubs(): Copy [a] chubs from [b] into an atom. + */ + u3_atom + u3i_chubs(c3_w a_w, + const c3_d* b_d); + + /* u3i_mp(): Copy the GMP integer [a] into an atom, and clear it. + */ + u3_atom + u3i_mp(mpz_t a_mp); + + /* u3i_vint(): increment [a]. + */ + u3_atom + u3i_vint(u3_noun a); + + /* u3i_cell(): Produce the cell `[a b]`. + */ + u3_noun + u3i_cell(u3_noun a, u3_noun b); +# define u3nc(a, b) u3i_cell(a, b) + + /* u3i_defcons(): allocate cell for deferred construction. + ** NB: [hed] and [tel] pointers MUST be filled. + */ + u3_cell + u3i_defcons(u3_noun** hed, u3_noun** tel); + + /* u3i_trel(): Produce the triple `[a b c]`. + */ + u3_noun + u3i_trel(u3_noun a, u3_noun b, u3_noun c); +# define u3nt(a, b, c) u3i_trel(a, b, c) + + /* u3i_qual(): Produce the cell `[a b c d]`. + */ + u3_noun + u3i_qual(u3_noun a, u3_noun b, u3_noun c, u3_noun d); +# define u3nq(a, b, c, d) u3i_qual(a, b, c, d) + + /* u3i_string(): Produce an LSB-first atom from the C string [a]. + */ + u3_atom + u3i_string(const c3_c* a_c); + + /* u3i_tape(): from a C string, to a list of bytes. + */ + u3_noun + u3i_tape(const c3_c* txt_c); + + /* u3i_list(): list from `u3_none`-terminated varargs. + */ + u3_noun + u3i_list(u3_weak som, ...); +# define u3nl u3i_list + + /* u3i_edit(): + ** + ** Mutate `big` at axis `axe` with new value `som` + ** `axe` is RETAINED. + */ + u3_noun + u3i_edit(u3_noun big, u3_noun axe, u3_noun som); + + /* u3i_molt(): + ** + ** Mutate `som` with a 0-terminated list of axis, noun pairs. + ** Axes must be cats (31 bit). + */ + u3_noun + u3i_molt(u3_noun som, ...); + +#endif /* ifndef U3_IMPRISON_H */ diff --git a/include/jets.h b/include/jets.h new file mode 100644 index 0000000..9723a0d --- /dev/null +++ b/include/jets.h @@ -0,0 +1,317 @@ +/// @file + +#ifndef U3_JETS_H +#define U3_JETS_H + +#include "allocate.h" +#include "c3.h" +#include "types.h" + + /** Noun semantics. + **/ +#if 0 ++= location $: pattern=(each static dynamic) + name=term + hooks=(map term axis) + == ++= static (each payload=* parent=location) ++= dynamic [where=axis parent=location] +:: ++= registry [roots=(map * location) parents=(list parent)] ++= parent (pair axis (map location location)) +:: ++= activation $: hot-index=@ud + drivers=(map axis @ud) + label=path + jit=* :: FIXME: should probably be (map battery *) + :: since there can be multiple batteries per location + == ++= hot-info $: reg=registry + hot-index=@ud + drivers=(map axis @ud) + label=path + == ++= bash @ :: battery hash (sha-256 based) +:: ++= hot (map bash hot-info) ++= cold (map battery=^ (pair bash registry)) ++= warm (map location activation) +#endif + + /** Data structures. + *** + *** All of these are transient structures allocated with malloc. + **/ + /* u3j_harm: jet arm. + */ + typedef struct _u3j_harm { + c3_c* fcs_c; // `.axe` or name + u3_noun (*fun_f)(u3_noun); // compute or 0 / semitransfer + // c3_o (*val_f)(u3_noun); // validate or 0 / retain + c3_o ice; // perfect (don't test) + c3_o tot; // total (never punts) + c3_o liv; // live (enabled) + c3_l axe_l; // computed/discovered axis + struct _u3j_core* cop_u; // containing core + } u3j_harm; + + /* u3j_hood: hook description. + */ + typedef struct _u3j_hood { + c3_c* nam_c; // hook name + c3_l axe_l; // hook axis (XX: direct) + c3_o kic_o; // hook is kick (vs. fragment) + c3_l sax_l; // hook subject axis (XX: direct) + } u3j_hood; + + /* u3j_core: driver definition. + */ + typedef struct _u3j_core { + c3_c* cos_c; // control string + c3_l axe_l; // axis to parent + struct _u3j_harm* arm_u; // blank-terminated static list + struct _u3j_core* dev_u; // blank-terminated static list + c3_c** bas_u; // blank-terminated static list + struct _u3j_hood* huc_u; // blank-terminated static list + struct _u3j_core* par_u; // dynamic parent pointer + c3_l jax_l; // index in global dashboard + } u3j_core; + + /* u3j_dash, u3_Dash, u3D: jet dashboard singleton + */ + typedef struct _u3j_dash { + u3j_core* dev_u; // null-terminated static list + c3_l len_l; // dynamic array length + c3_l all_l; // allocated length + u3j_core* ray_u; // dynamic array by axis + } u3j_dash; + + /* u3j_fist: a single step in a fine check. + */ + typedef struct { + u3_noun bat; // battery + u3_noun pax; // parent axis + } u3j_fist; + + /* u3j_fink: (fine check) enough data to verify a located core. + */ + typedef struct { + c3_w len_w; // number of fists + u3_noun sat; // static noun at end of check + u3j_fist fis_u[0]; // fists + } u3j_fink; + + /* u3j_rite: site of a %fast, used to skip re-mining. + */ + typedef struct { + c3_o own_o; // rite owns fink? + u3_weak clu; // cached product of clue formula + u3p(u3j_fink) fin_p; // fine check + } u3j_rite; + + /* u3j_site: site of a kick (nock 9), used to cache call target. + */ + struct _u3n_prog; + typedef struct { + u3p(struct _u3n_prog) pog_p; // program for formula + u3_noun axe; // axis + u3_weak bat; // battery (for verification) + u3_weak bas; // hash of battery (for hot find) + u3_weak loc; // location (for reaming) + c3_o jet_o; // have jet driver? + c3_o fon_o; // site owns fink? + u3_weak lab; // label (for tracing) + u3j_core* cop_u; // jet core + u3j_harm* ham_u; // jet arm + u3p(u3j_fink) fin_p; // fine check + } u3j_site; + + /* u3j_hank: cached hook information. + */ + typedef struct { + u3_weak hax; // axis of hooked inner core + u3j_site sit_u; // call-site data + } u3j_hank; + + /** Globals. + **/ + /* u3_Dash: jet dashboard. + */ + extern u3j_dash u3j_Dash; +# define u3D u3j_Dash + + /** Functions. + **/ + /* u3j_boot(): initialize jet system. + */ + c3_w + u3j_boot(c3_o nuu_o); + + /* u3j_clear(): clear jet table to re-register. + */ + void + u3j_clear(void); + + /* u3j_cook(): + ** + ** Execute hook from core, call site cached by arbitrary c string + */ + u3_noun + u3j_cook(const c3_c* key_c, + u3_noun cor, + const c3_c* tam_c); + + /* u3j_hook(): + ** + ** Execute hook from core. + */ + u3_noun + u3j_hook(u3_noun cor, + const c3_c* tam_c); + + /* u3j_soft(): + ** + ** Execute hook from core, without jet. + */ + u3_noun + u3j_soft(u3_noun cor, + const c3_c* tam_c); + + /* u3j_kick(): try to kick by jet. If no kick, produce u3_none. + ** + ** `axe` is RETAINED by the caller; `cor` is RETAINED iff there + ** is no kick, TRANSFERRED if one. + */ + u3_weak + u3j_kick(u3_noun cor, u3_noun axe); + + /* u3j_kink(): kick either by jet or by nock. + */ + u3_noun + u3j_kink(u3_noun cor, + u3_noun axe); + + /* u3j_mine(): register core for jets. + */ + void + u3j_mine(u3_noun clu, + u3_noun cor); + + /* u3j_ream(): refresh after restoring from checkpoint. + */ + void + u3j_ream(void); + + /* u3j_stay(): extract cold state + */ + u3_noun + u3j_stay(void); + + /* u3j_load(): inject cold state + */ + void + u3j_load(u3_noun rel); + + /* u3j_reap(): promote jet state. + */ + void + u3j_reap(u3a_jets* jed_u); + + /* u3j_rite_mine(): mine cor with clu, using u3j_rite for caching + */ + void + u3j_rite_mine(u3j_rite* rit_u, u3_noun clu, u3_noun cor); + + /* u3j_rite_take(): copy junior rite references from src_u to dst_u. + */ + void + u3j_rite_take(u3j_rite* dst_u, u3j_rite* src_u); + + /* u3j_rite_merge(): copy rite references from src_u to dst_u, + ** losing old references + */ + void + u3j_rite_merge(u3j_rite* dst_u, u3j_rite* src_u); + + /* u3j_site_take(): copy junior site references. + */ + void + u3j_site_take(u3j_site* dst_u, u3j_site* src_u); + + /* u3j_site_merge(): copy site references from src_u to dst_u, + ** losing old references + */ + void + u3j_site_merge(u3j_site* dst_u, u3j_site* src_u); + + /* u3j_site_ream(): refresh u3j_site after restoring from checkpoint + */ + void + u3j_site_ream(u3j_site* sit_u); + + /* u3j_site_kick(): kick a core with a u3j_site cache. + */ + u3_weak + u3j_site_kick(u3_noun cor, u3j_site* sit_u); + + /* u3j_gate_prep(): prepare a locally cached gate to call repeatedly. + */ + void + u3j_gate_prep(u3j_site* sit_u, u3_noun cor); + + /* u3j_gate_slam(): slam a site prepared by u3j_gate_find() with sample. + */ + u3_noun + u3j_gate_slam(u3j_site* sit_u, u3_noun sam); + + /* u3j_gate_lose(): clean up site prepared by u3j_gate_find(). + */ + void + u3j_gate_lose(u3j_site* sit_u); + + /* u3j_rite_mark(): mark u3j_rite for gc. + */ + c3_w + u3j_rite_mark(u3j_rite* rit_u); + + /* u3j_rite_lose(): lose references of u3j_rite (but do not free). + */ + void + u3j_rite_lose(u3j_rite* rit_u); + + /* u3j_site_lose(): lose references of u3j_site (but do not free). + */ + void + u3j_site_lose(u3j_site* sit_u); + + /* u3j_site_mark(): mark u3j_site for gc. + */ + c3_w + u3j_site_mark(u3j_site* sit_u); + + /* u3j_mark(): mark jet state for gc. + */ + c3_w + u3j_mark(FILE* fil_u); + + /* u3j_free(): free jet state. + */ + void + u3j_free(void); + + /* u3j_free_hank(): free an entry from the hank cache. + */ + void + u3j_free_hank(u3_noun kev); + + /* u3j_reclaim(): clear ad-hoc persistent caches to reclaim memory. + */ + void + u3j_reclaim(void); + + /* u3j_rewrite_compact(): rewrite jet state for compaction. + */ + void + u3j_rewrite_compact(); + +#endif /* ifndef U3_JETS_H */ diff --git a/include/log.h b/include/log.h new file mode 100644 index 0000000..4e94147 --- /dev/null +++ b/include/log.h @@ -0,0 +1,24 @@ +/// @file + +#ifndef U3_LOG_H +#define U3_LOG_H + +#include "types.h" + +/* u3l_log(): logs to stderr or redirects to configured function. +*/ + void + u3l_log(const char* format, ...) + __attribute__ ((format (printf, 1, 2))); + +/* u3l_punt(): condtionally logs a named punt + * (e.g. "mint-punt" for the `name` "mint") + * when `pro` is u3_none, and returns pro. + * For use when a jet driver declines to handle + * a core, when the user should be somehow notified + * (e.g. in a cryptographic jet). + */ + u3_weak + u3l_punt(const char* name, u3_weak pro); + +#endif /* ifndef U3_LOG_H */ diff --git a/include/manage.h b/include/manage.h new file mode 100644 index 0000000..14f9cc5 --- /dev/null +++ b/include/manage.h @@ -0,0 +1,201 @@ +/// @file + +#ifndef U3_MANAGE_H +#define U3_MANAGE_H + +#include "v1/manage.h" +#include "v2/manage.h" + +#include "c3.h" +#include "types.h" +#include "version.h" + + /** System management. + **/ + /* u3m_boot(): start the u3 system. return next event, starting from 1. + */ + c3_d + u3m_boot(c3_c* dir_c, size_t len_i); + + /* u3m_pier(): make a pier. + */ + c3_c* + u3m_pier(c3_c* dir_c); + + /* u3m_boot_lite(): start without checkpointing. + */ + c3_d + u3m_boot_lite(size_t len_i); + + /* u3m_stop(): graceful shutdown cleanup. */ + void + u3m_stop(void); + + /* u3m_bail(): bail out. Does not return. + ** + ** Bail motes: + ** + ** %exit :: semantic failure + ** %evil :: bad crypto + ** %intr :: interrupt + ** %fail :: execution failure + ** %foul :: assert failure + ** %need :: network block + ** %meme :: out of memory + ** %time :: timed out + ** %oops :: assertion failure + */ + c3_i + u3m_bail(c3_m how_m) __attribute__((noreturn)); + + /* u3m_fault(): handle a memory event with libsigsegv protocol. + */ + c3_i + u3m_fault(void* adr_v, c3_i ser_i); + + /* u3m_foul(): dirty all pages and disable tracking. + */ + void + u3m_foul(void); + + /* u3m_backup(): copy snapshot to .urb/bhk (if it doesn't exist yet). + */ + c3_o + u3m_backup(c3_o); + + /* u3m_save(): update the checkpoint. + */ + void + u3m_save(void); + + /* u3m_toss(): discard ephemeral memory. + */ + void + u3m_toss(void); + + /* u3m_ward(): tend the guardpage. + */ + void + u3m_ward(void); + + /* u3m_init(): start the environment. + */ + void + u3m_init(size_t len_i); + + /* u3m_pave(): instantiate or activate image. + */ + void + u3m_pave(c3_o nuu_o); + + /* u3m_signal(): treat a nock-level exception as a signal interrupt. + */ + void + u3m_signal(u3_noun sig_l); + + /* u3m_file(): load file, as atom, or bail. + */ + u3_noun + u3m_file(c3_c* pas_c); + + /* u3m_error(): bail out with %exit, ct_pushing error. + */ + c3_i + u3m_error(c3_c* str_c); + + /* u3m_hate(): new, integrated leap mechanism (enter). + */ + void + u3m_hate(c3_w pad_w); + + /* u3m_love(): return product from leap. + */ + u3_noun + u3m_love(u3_noun pro); + + /* u3m_soft(): system soft wrapper. unifies unix and nock errors. + ** + ** Produces [%$ result] or [%error (list tank)]. + */ + u3_noun + u3m_soft(c3_w mil_w, u3_funk fun_f, u3_noun arg); + + /* u3m_soft_slam: top-level call. + */ + u3_noun + u3m_soft_slam(u3_noun gat, u3_noun sam); + + /* u3m_soft_nock: top-level nock. + */ + u3_noun + u3m_soft_nock(u3_noun bus, u3_noun fol); + + /* u3m_soft_sure(): top-level call assumed correct. + */ + u3_noun + u3m_soft_sure(u3_funk fun_f, u3_noun arg); + + /* u3m_soft_run(): descend into virtualization context. + */ + u3_noun + u3m_soft_run(u3_noun gul, + u3_funq fun_f, + u3_noun aga, + u3_noun agb); + + /* u3m_soft_esc(): namespace lookup to (unit ,*). + */ + u3_noun + u3m_soft_esc(u3_noun ref, u3_noun sam); + + /* u3m_mark(): mark all nouns in the road. + */ + c3_w + u3m_mark(FILE* fil_u); + + /* u3m_grab(): garbage-collect the world, plus extra roots. + */ + void + u3m_grab(u3_noun som, ...); // terminate with u3_none + + /* u3m_water(): produce high and low watermarks. Asserts u3R == u3H. + */ + void + u3m_water(u3_post* low_p, u3_post* hig_p); + + /* u3m_pretty(): dumb prettyprint to string. RETAIN. + */ + c3_c* + u3m_pretty(u3_noun som); + + /* u3m_pretty_path(): prettyprint a path to string. RETAIN. + */ + c3_c* + u3m_pretty_path(u3_noun som); + + /* u3m_p(): dumb print with caption. RETAIN. + */ + void + u3m_p(const c3_c* cap_c, u3_noun som); + + /* u3m_tape(): dump a tape to stdout. + */ + void + u3m_tape(u3_noun tep); + + /* u3m_wall(): dump a wall to stdout. + */ + void + u3m_wall(u3_noun wol); + + /* u3m_reclaim: clear persistent caches to reclaim memory. + */ + void + u3m_reclaim(void); + + /* u3m_pack: compact (defragment) memory, returns u3a_open delta. + */ + c3_w + u3m_pack(void); + +#endif /* ifndef U3_MANAGE_H */ diff --git a/include/nock.h b/include/nock.h new file mode 100644 index 0000000..e680327 --- /dev/null +++ b/include/nock.h @@ -0,0 +1,144 @@ +/// @file + +#ifndef U3_NOCK_H +#define U3_NOCK_H + +#include + +#include "c3.h" +#include "jets.h" +#include "types.h" +#include "zave.h" + + /** Data structures. + *** + **/ + + /* u3n_memo: %memo hint space + */ + typedef struct { + c3_l sip_l; + u3_noun key; + u3z_cid cid; + } u3n_memo; + + /* u3n_prog: program compiled from nock + */ + typedef struct _u3n_prog { + struct { + c3_o own_o; // program owns ops_y? + c3_w len_w; // length of bytecode (bytes) + c3_y* ops_y; // actual array of bytes + } byc_u; // bytecode + struct { + c3_w len_w; // number of literals + u3_noun* non; // array of literals + } lit_u; // literals + struct { + c3_w len_w; // number of memo slots + u3n_memo* sot_u; // array of memo slots + } mem_u; // memo slot data + struct { + c3_w len_w; // number of calls sites + u3j_site* sit_u; // array of sites + } cal_u; // call site data + struct { + c3_w len_w; // number of registration sites + u3j_rite* rit_u; // array of sites + } reg_u; // registration site data + } u3n_prog; + + /** Functions. + **/ + /* u3n_nock_on(): produce .*(bus fol). + */ + u3_noun + u3n_nock_on(u3_noun bus, u3_noun fol); + + /* u3n_find(): return prog for given formula, + * split by key (u3_nul for none). RETAIN. + */ + u3p(u3n_prog) + u3n_find(u3_noun key, u3_noun fol); + + /* u3n_burn(): execute u3n_prog with bus as subject. + */ + u3_noun + u3n_burn(u3p(u3n_prog) pog_p, u3_noun bus); + + /* u3n_slam_on(): produce (gat sam). + */ + u3_noun + u3n_slam_on(u3_noun gat, u3_noun sam); + + /* u3n_kick_on(): fire `gat` without changing the sample. + */ + u3_noun + u3n_kick_on(u3_noun gat); + + /* u3n_nock_in(): produce .*(bus fol), as ++toon, in namespace. + */ + u3_noun + u3n_nock_in(u3_noun fly, u3_noun bus, u3_noun fol); + + /* u3n_nock_it(): produce .*(bus fol), as ++toon, in namespace. + */ + u3_noun + u3n_nock_it(u3_noun sea, u3_noun bus, u3_noun fol); + + /* u3n_nock_et(): produce .*(bus fol), as ++toon, in namespace. + */ + u3_noun + u3n_nock_et(u3_noun gul, u3_noun bus, u3_noun fol); + + /* u3n_slam_in(): produce (gat sam), as ++toon, in namespace. + */ + u3_noun + u3n_slam_in(u3_noun fly, u3_noun gat, u3_noun sam); + + /* u3n_slam_it(): produce (gat sam), as ++toon, in namespace. + */ + u3_noun + u3n_slam_it(u3_noun sea, u3_noun gat, u3_noun sam); + + /* u3n_slam_et(): produce (gat sam), as ++toon, in namespace. + */ + u3_noun + u3n_slam_it(u3_noun gul, u3_noun gat, u3_noun sam); + + /* u3n_nock_an(): as slam_in(), but with empty fly. + */ + u3_noun + u3n_nock_an(u3_noun bus, u3_noun fol); + + /* u3n_reap(): promote bytecode state. + */ + void + u3n_reap(u3p(u3h_root) har_p); + + /* u3n_mark(): mark bytecode cache. + */ + c3_w + u3n_mark(FILE* fil_u); + + /* u3n_reclaim(): clear ad-hoc persistent caches to reclaim memory. + */ + void + u3n_reclaim(void); + + /* u3n_rewrite_compact(): rewrite bytecode cache for compaction. + */ + void + u3n_rewrite_compact(); + + /* u3n_free(): free bytecode cache. + */ + void + u3n_free(void); + + /* u3n_ream(): refresh after restoring from checkpoint. + */ + void + u3n_ream(void); + +#endif /* ifndef U3_NOCK_H */ diff --git a/include/noun.h b/include/noun.h new file mode 100644 index 0000000..965330a --- /dev/null +++ b/include/noun.h @@ -0,0 +1,26 @@ +/// @file + +#ifndef U3_NOUN_H +#define U3_NOUN_H + +#include "allocate.h" +#include "hashtable.h" +#include "error.h" +#include "jets.h" +#include "jets/k.h" +#include "jets/q.h" +#include "manage.h" +#include "options.h" +#include "serial.h" +#include "types.h" +#include "vortex.h" +#include "zave.h" +#include "imprison.h" +#include "log.h" +#include "nock.h" +#include "retrieve.h" +#include "trace.h" +#include "urth.h" +#include "xtract.h" + +#endif /* ifndef U3_NOUN_H */ diff --git a/include/options.h b/include/options.h new file mode 100644 index 0000000..29ca6ce --- /dev/null +++ b/include/options.h @@ -0,0 +1,57 @@ +/// @file + +#ifndef U3_OPTIONS_H +#define U3_OPTIONS_H + +#include "c3.h" +#include "types.h" + + /** Data structures. + **/ + /* u3o_config: process / system configuration. + */ + typedef struct _u3o_config { + u3_noun who; // single identity + c3_c* dir_c; // execution directory (pier) + c3_c* eph_c; // ephemeral file + c3_w wag_w; // flags (both ways) + size_t wor_i; // loom word-length (<= u3a_words) + c3_w tos_w; // loom toss skip-length + c3_w hap_w; // transient memoization cache size + c3_w per_w; // persistent memoization cache size + void (*stderr_log_f)(c3_c*); // errors from c code + void (*slog_f)(u3_noun); // function pointer for slog + void (*sign_hold_f)(void); // suspend system signal regime + void (*sign_move_f)(void); // restore system signal regime + } u3o_config; + + /* u3o_flag: process/system flags. + ** + ** _debug flags are set outside u3 and heard inside it. + ** _check flags are set inside u3 and heard outside it. + */ + enum u3o_flag { // execution flags + u3o_debug_ram = 1 << 0, // debug: gc + u3o_debug_cpu = 1 << 1, // debug: profile + u3o_check_corrupt = 1 << 2, // check: gc memory + u3o_check_fatal = 1 << 3, // check: unrecoverable + u3o_verbose = 1 << 4, // be remarkably wordy + u3o_dryrun = 1 << 5, // don't touch checkpoint + u3o_quiet = 1 << 6, // disable ~& + u3o_hashless = 1 << 7, // disable hashboard + u3o_trace = 1 << 8, // enables trace dumping + u3o_no_demand = 1 << 9, // disables demand paging + u3o_auto_meld = 1 << 10, // enables meld under pressure + u3o_soft_mugs = 1 << 11, // continue replay on mismatch + u3o_swap = 1 << 12, // enables ephemeral file + u3o_toss = 1 << 13 // reclaim often + }; + + /** Globals. + **/ + /* u3_Config / u3C: global memory control. + */ + extern u3o_config u3o_Config; +# define u3C u3o_Config + +#endif /* ifndef U3_OPTIONS_H */ diff --git a/include/retrieve.h b/include/retrieve.h new file mode 100644 index 0000000..7423ad9 --- /dev/null +++ b/include/retrieve.h @@ -0,0 +1,538 @@ +/// @file + +#ifndef U3_RETRIEVE_H +#define U3_RETRIEVE_H + +#include "c3.h" +#include "allocate.h" +#include "error.h" +#include "gmp.h" +#include "types.h" + + /** u3r_*: read without ever crashing. + **/ + + /* u3r_cell(): factor (a) as a cell (b c). + */ + inline c3_o + u3r_cell(u3_noun a, u3_noun* b, u3_noun* c) + { + u3a_cell* cel_u; + + u3_assert(u3_none != a); + + if ( c3y == u3a_is_cell(a) ) { + cel_u = u3a_to_ptr(a); + if ( b ) *b = cel_u->hed; + if ( c ) *c = cel_u->tel; + return c3y; + } + else { + return c3n; + } + } + + /* u3r_trel(): factor (a) as a trel (b c d). + */ + inline c3_o + u3r_trel(u3_noun a, u3_noun *b, u3_noun *c, u3_noun *d) + { + u3_noun guf; + + if ( (c3y == u3r_cell(a, b, &guf)) && + (c3y == u3r_cell(guf, c, d)) ) { + return c3y; + } + else { + return c3n; + } + } + + /* u3r_qual(): factor (a) as a qual (b c d e). + */ + inline c3_o + u3r_qual(u3_noun a, + u3_noun* b, + u3_noun* c, + u3_noun* d, + u3_noun* e) + { + u3_noun guf; + + if ( (c3y == u3r_cell(a, b, &guf)) && + (c3y == u3r_trel(guf, c, d, e)) ) { + return c3y; + } + else return c3n; + } + + /* u3r_quil(): factor (a) as a quil (b c d e f). + */ + inline c3_o + u3r_quil(u3_noun a, + u3_noun* b, + u3_noun* c, + u3_noun* d, + u3_noun* e, + u3_noun* f) + { + u3_noun guf; + + if ( (c3y == u3r_cell(a, b, &guf)) && + (c3y == u3r_qual(guf, c, d, e, f)) ) { + return c3y; + } + else return c3n; + } + + /* u3r_hext(): factor (a) as a hext (b c d e f g) + */ + inline c3_o + u3r_hext(u3_noun a, + u3_noun* b, + u3_noun* c, + u3_noun* d, + u3_noun* e, + u3_noun* f, + u3_noun* g) + { + u3_noun guf; + + if ( (c3y == u3r_cell(a, b, &guf)) && + (c3y == u3r_quil(guf, c, d, e, f, g)) ) { + return c3y; + } + else return c3n; + } + + /* u3r_at(): fragment `a` of `b`, or u3_none. + */ + u3_weak + u3r_at(u3_atom a, u3_weak b); + + /* u3r_mean(): + ** + ** Attempt to deconstruct `a` by axis, noun pairs; 0 terminates. + ** Axes must be sorted in tree order. + */ + c3_o + u3r_vmean(u3_noun a, va_list ap); + c3_o + u3r_mean(u3_noun a, ...); + + /* u3r_mug_both(): Join two mugs. + */ + c3_l + u3r_mug_both(c3_w lef_w, c3_w rit_w); + + /* u3r_mug_bytes(): Compute the mug of `buf`, `len`, LSW first. + */ + c3_l + u3r_mug_bytes(const c3_y *buf_y, + c3_w len_w); + + /* u3r_mug_c(): Compute the mug of `a`, LSB first. + */ + c3_l + u3r_mug_c(const c3_c *a_c); + + /* u3r_mug_cell(): Compute the mug of the cell `[hed tel]`. + */ + c3_l + u3r_mug_cell(u3_noun hed, + u3_noun tel); + + /* u3r_mug_chub(): Compute the mug of `num`, LSW first. + */ + c3_l + u3r_mug_chub(c3_d num_d); + + /* u3r_mug_words(): 31-bit nonzero MurmurHash3 on raw words. + */ + c3_l + u3r_mug_words(const c3_w* key_w, c3_w len_w); + + /* u3r_mug(): statefully mug a noun with 31-bit murmur3. + */ + c3_l + u3r_mug(u3_noun veb); + + /* u3r_fing(): + ** + ** Yes iff (a) and (b) are the same copy of the same noun. + ** (Ie, by pointer equality - u3r_sing with false negatives.) + */ + c3_o + u3r_fing(u3_noun a, + u3_noun b); + + /* u3r_fing_cell(): + ** + ** Yes iff `[p q]` and `b` are the same copy of the same noun. + */ + c3_o + u3r_fing_cell(u3_noun p, + u3_noun q, + u3_noun b); + + /* u3r_fing_mixt(): + ** + ** Yes iff `[p q]` and `b` are the same copy of the same noun. + */ + c3_o + u3r_fing_mixt(const c3_c* p_c, + u3_noun q, + u3_noun b); + + /* u3r_fing_trel(): + ** + ** Yes iff `[p q r]` and `b` are the same copy of the same noun. + */ + c3_o + u3r_fing_trel(u3_noun p, + u3_noun q, + u3_noun r, + u3_noun b); + + /* u3r_fing_qual(): + ** + ** Yes iff `[p q r s]` and `b` are the same copy of the same noun. + */ + c3_o + u3r_fing_qual(u3_noun p, + u3_noun q, + u3_noun r, + u3_noun s, + u3_noun b); + + /* u3r_sing(): noun value equality. + ** + ** Unifies noun pointers on inner roads. + */ + c3_o + u3r_sing(u3_noun a, u3_noun b); + + /* u3r_sing_c(): cord/C-string value equivalence. + */ + c3_o + u3r_sing_c(const c3_c* a_c, + u3_noun b); + + /* u3r_sing_cell(): + ** + ** Yes iff `[p q]` and `b` are the same noun. + */ + c3_o + u3r_sing_cell(u3_noun p, + u3_noun q, + u3_noun b); + + /* u3r_sing_mixt(): + ** + ** Yes iff `[p q]` and `b` are the same noun. + */ + c3_o + u3r_sing_mixt(const c3_c* p_c, + u3_noun q, + u3_noun b); + + /* u3r_sing_trel(): + ** + ** Yes iff `[p q r]` and `b` are the same noun. + */ + c3_o + u3r_sing_trel(u3_noun p, + u3_noun q, + u3_noun r, + u3_noun b); + + /* u3r_sing_qual(): + ** + ** Yes iff `[p q r s]` and `b` are the same noun. + */ + c3_o + u3r_sing_qual(u3_noun p, + u3_noun q, + u3_noun r, + u3_noun s, + u3_noun b); + + /* u3r_nord(): + ** + ** Return 0, 1 or 2 if `a` is below, equal to, or above `b`. + */ + u3_atom + u3r_nord(u3_noun a, + u3_noun b); + + /* u3r_mold(): + ** + ** Divide `a` as a mold `[b.[p q] c]`. + */ + c3_o + u3r_mold(u3_noun a, + u3_noun* b, + u3_noun* c); + + /* u3r_bite(): retrieve/default $bloq and $step from $bite. + */ + c3_o + u3r_bite(u3_noun bite, u3_atom* bloq, u3_atom *step); + + /* u3r_p(): + ** + ** & [0] if [a] is of the form [b *c]. + */ + c3_o + u3r_p(u3_noun a, + u3_noun b, + u3_noun* c); + + /* u3r_bush(): + ** + ** Factor [a] as a bush [b.[p q] c]. + */ + c3_o + u3r_bush(u3_noun a, + u3_noun* b, + u3_noun* c); + + /* u3r_pq(): + ** + ** & [0] if [a] is of the form [b *c d]. + */ + c3_o + u3r_pq(u3_noun a, + u3_noun b, + u3_noun* c, + u3_noun* d); + + /* u3r_pqr(): + ** + ** & [0] if [a] is of the form [b *c *d *e]. + */ + c3_o + u3r_pqr(u3_noun a, + u3_noun b, + u3_noun* c, + u3_noun* d, + u3_noun* e); + + /* u3r_pqrs(): + ** + ** & [0] if [a] is of the form [b *c *d *e *f]. + */ + c3_o + u3r_pqrs(u3_noun a, + u3_noun b, + u3_noun* c, + u3_noun* d, + u3_noun* e, + u3_noun* f); + + /* u3r_met(): + ** + ** Return the size of (b) in bits, rounded up to + ** (1 << a_y). + ** + ** For example, (a_y == 3) returns the size in bytes. + ** NB: (a_y) must be < 37. + */ + c3_w + u3r_met(c3_y a_y, + u3_atom b); + + /* u3r_bit(): + ** + ** Return bit (a_w) of (b). + */ + c3_b + u3r_bit(c3_w a_w, + u3_atom b); + + /* u3r_byte(): + ** + ** Return byte (a_w) of (b). + */ + c3_y + u3r_byte(c3_w a_w, + u3_atom b); + + /* u3r_bytes(): + ** + ** Copy bytes (a_w) through (a_w + b_w - 1) from (d) to (c). + */ + void + u3r_bytes(c3_w a_w, + c3_w b_w, + c3_y* c_y, + u3_atom d); + + /* u3r_bytes_fit(): + ** + ** Copy (len_w) bytes of (a) into (buf_y) if it fits, returning overage. + */ + c3_w + u3r_bytes_fit(c3_w len_w, + c3_y* buf_y, + u3_atom a); + + /* u3r_bytes_alloc(): + ** + ** Copy (len_w) bytes starting at (a_w) from (b) into a fresh allocation. + */ + c3_y* + u3r_bytes_alloc(c3_w a_w, + c3_w len_w, + u3_atom b); + + /* u3r_bytes_all(): + ** + ** Allocate and return a new byte array with all the bytes of (a), + ** storing the length in (len_w). + */ + c3_y* + u3r_bytes_all(c3_w* len_w, + u3_atom a); + + /* u3r_chop_bits(): + ** + ** XOR `wid_d` bits from`src_w` at `bif_g` to `dst_w` at `bif_g` + ** + ** NB: [dst_w] must have space for [bit_g + wid_d] bits + */ + void + u3r_chop_bits(c3_g bif_g, + c3_d wid_d, + c3_g bit_g, + c3_w* dst_w, + const c3_w* src_w); + + /* u3r_chop_words(): + ** + ** Into the bloq space of `met`, from position `fum` for a + ** span of `wid`, to position `tou`, XOR from `src_w` + ** into `dst_w`. + ** + ** NB: [dst_w] must have space for [tou_w + wid_w] bloqs + */ + void + u3r_chop_words(c3_g met_g, + c3_w fum_w, + c3_w wid_w, + c3_w tou_w, + c3_w* dst_w, + c3_w len_w, + const c3_w* src_w); + + /* u3r_chop(): + ** + ** Into the bloq space of `met`, from position `fum` for a + ** span of `wid`, to position `tou`, XOR from atom `src` + ** into `dst_w`. + ** + ** NB: [dst_w] must have space for [tou_w + wid_w] bloqs + */ + void + u3r_chop(c3_g met_g, + c3_w fum_w, + c3_w wid_w, + c3_w tou_w, + c3_w* dst_w, + u3_atom src); + + /* u3r_mp(): + ** + ** Copy (b) into (a_mp). + */ + void + u3r_mp(mpz_t a_mp, + u3_atom b); + + /* u3r_short(): + ** + ** Return short (a_w) of (b). + */ + c3_s + u3r_short(c3_w a_w, + u3_atom b); + + /* u3r_word(): + ** + ** Return word (a_w) of (b). + */ + c3_w + u3r_word(c3_w a_w, + u3_atom b); + + + /* u3r_word_fit(): + ** + ** Fill (out_w) with (a) if it fits, returning success. + */ + c3_t + u3r_word_fit(c3_w* out_w, + u3_atom a); + + /* u3r_chub(): + ** + ** Return double-word (a_w) of (b). + */ + c3_d + u3r_chub(c3_w a_w, + u3_atom b); + + /* u3r_words(): + ** + ** Copy words (a_w) through (a_w + b_w - 1) from (d) to (c). + */ + void + u3r_words(c3_w a_w, + c3_w b_w, + c3_w* c_w, + u3_atom d); + + /* u3r_chubs(): + ** + ** Copy double-words (a_w) through (a_w + b_w - 1) from (d) to (c). + */ + void + u3r_chubs(c3_w a_w, + c3_w b_w, + c3_d* c_d, + u3_atom d); + + /* u3r_safe_byte(): validate and retrieve byte. + */ + c3_o + u3r_safe_byte(u3_noun dat, c3_y* out_y); + + /* u3r_safe_word(): validate and retrieve word. + */ + c3_o + u3r_safe_word(u3_noun dat, c3_w* out_w); + + /* u3r_safe_chub(): validate and retrieve chub. + */ + c3_o + u3r_safe_chub(u3_noun dat, c3_d* out_d); + + /* u3r_string(): `a`, a text atom, as malloced C string. + */ + c3_c* + u3r_string(u3_atom a); + + /* u3r_tape(): `a`, a list of bytes, as malloced C string. + */ + c3_y* + u3r_tape(u3_noun a); + + /* u3r_skip(): + ** + ** Extract a constant from a formula, ignoring + ** safe/static hints, doing no computation. + */ + u3_weak + u3r_skip(u3_noun fol); + +#endif /* ifndef U3_RETRIEVE_H */ diff --git a/include/serial.h b/include/serial.h new file mode 100644 index 0000000..608d702 --- /dev/null +++ b/include/serial.h @@ -0,0 +1,137 @@ +/// @file + +#ifndef U3_SERIAL_H +#define U3_SERIAL_H + +#include "c3.h" +#include "imprison.h" +#include "types.h" + /* constants + */ + /* u3_dit_y: digit table for @ux/@uv/@uw. + */ + extern const c3_y u3s_dit_y[64]; + + /* opaque handles + */ + /* u3_cue_xeno: handle for cue-ing with an off-loom dictionary. + */ + typedef struct _u3_cue_xeno u3_cue_xeno; + + /* Noun serialization. All noun arguments RETAINED. + */ + + /* u3s_jam_fib(): jam without atom allocation. + ** + ** returns atom-suitable words, and *bit_w will have + ** the length (in bits). return should be freed with u3a_wfree(). + */ + c3_w + u3s_jam_fib(u3i_slab* sab_u, u3_noun a); + + /* u3s_jam_xeno(): jam with off-loom buffer (re-)allocation. + */ + c3_d + u3s_jam_xeno(u3_noun a, c3_d* len_d, c3_y** byt_y); + + /* u3s_cue(): cue [a] + */ + u3_noun + u3s_cue(u3_atom a); + + /* u3s_cue_xeno_init_with(): initialize a cue_xeno handle as specified. + */ + u3_cue_xeno* + u3s_cue_xeno_init_with(c3_d pre_d, c3_d siz_d); + + /* u3s_cue_xeno_init(): initialize a cue_xeno handle. + */ + u3_cue_xeno* + u3s_cue_xeno_init(void); + + /* u3s_cue_xeno_init(): cue on-loom, with off-loom dictionary in handle. + */ + u3_weak + u3s_cue_xeno_with(u3_cue_xeno* sil_u, + c3_d len_d, + const c3_y* byt_y); + + /* u3s_cue_xeno_init(): dispose cue_xeno handle. + */ + void + u3s_cue_xeno_done(u3_cue_xeno* sil_u); + + /* u3s_cue_xeno(): cue on-loom, with off-loom dictionary. + */ + u3_weak + u3s_cue_xeno(c3_d len_d, + const c3_y* byt_y); + + /* u3s_cue_bytes(): cue bytes onto the loom. + */ + u3_noun + u3s_cue_bytes(c3_d len_d, const c3_y* byt_y); + + /* u3s_cue_atom(): cue atom. + */ + u3_noun + u3s_cue_atom(u3_atom a); + + /* u3s_etch_ud_smol(): c3_d to @ud + ** + ** =(26 (met 3 (scot %ud (dec (bex 64))))) + */ + c3_y* + u3s_etch_ud_smol(c3_d a_d, c3_y hun_y[26]); + + /* u3s_etch_ud(): atom to @ud. + */ + u3_atom + u3s_etch_ud(u3_atom a); + + /* u3s_etch_ud_c(): atom to @ud, as a malloc'd c string. + */ + size_t + u3s_etch_ud_c(u3_atom a, c3_c** out_c); + + /* u3s_etch_ux(): atom to @ux. + */ + u3_atom + u3s_etch_ux(u3_atom a); + + /* u3s_etch_ux_c(): atom to @ux, as a malloc'd c string. + */ + size_t + u3s_etch_ux_c(u3_atom a, c3_c** out_c); + + /* u3s_etch_uv(): atom to @uv. + */ + u3_atom + u3s_etch_uv(u3_atom a); + + /* u3s_etch_uv_c(): atom to @uv, as a malloc'd c string. + */ + size_t + u3s_etch_uv_c(u3_atom a, c3_c** out_c); + + /* u3s_etch_uw(): atom to @uw. + */ + u3_atom + u3s_etch_uw(u3_atom a); + + /* u3s_etch_uw_c(): atom to @uw, as a malloc'd c string. + */ + size_t + u3s_etch_uw_c(u3_atom a, c3_c** out_c); + + /* u3s_sift_ud_bytes: parse @ud. + */ + u3_weak + u3s_sift_ud_bytes(c3_w len_w, c3_y* byt_y); + + /* u3s_sift_ud: parse @ud. + */ + u3_weak + u3s_sift_ud(u3_atom a); + +#endif /* ifndef U3_SERIAL_H */ diff --git a/include/softblas.h b/include/softblas.h index 225204a..fc9333c 100644 --- a/include/softblas.h +++ b/include/softblas.h @@ -4,6 +4,15 @@ #include "softfloat.h" #include +/* If we are building for Urbit, we need to use the Urbit allocator. + You are responsible for linking SoftBLAS in that case. +*/ +#ifdef VERE +#include "noun.h" +#define malloc u3a_malloc +#define free u3a_free +#endif + // TYPES typedef struct { @@ -69,7 +78,7 @@ typedef struct { #define SB_COMPLEX16_I {SB_REAL16_ZERO, SB_REAL16_ONE} #define SB_COMPLEX32_I {SB_REAL32_ZERO, SB_REAL32_ONE} #define SB_COMPLEX64_I {SB_REAL64_ZERO, SB_REAL64_ONE} -// #define SB_COMPLEX128_I {SB_REAL128L_ZERO, SB_REAL128U_ONE} +#define SB_COMPLEX128_I {SB_REAL128L_ZERO, SB_REAL128U_ZERO, SB_REAL128L_ONE, SB_REAL128U_ONE} // SOFTFLOAT FUNCTIONS @@ -152,72 +161,72 @@ typedef struct { // Level 1 // Single-precision -float32_t sasum(uint64_t N, const float32_t *SX, uint64_t incX); -void saxpy(uint64_t N, float32_t SA, float32_t *SX, int64_t incX, float32_t *SY, int64_t incY); -void scopy(uint64_t N, const float32_t *SX, int64_t incX, float32_t *SY, int64_t incY); -float32_t sdot(const uint64_t N, const float32_t *X, const int64_t incX, const float32_t *Y, const int64_t incY); -float32_t snrm2(uint64_t N, const float32_t *X, uint64_t incX); -void srot(const uint64_t N, float32_t *X, const uint64_t incX, float32_t *Y, const uint64_t incY, const float32_t c, const float32_t s); -void srotg(float32_t *a, float32_t *b, float32_t *c, float32_t *s); -void srotm(const uint64_t N, float32_t *X, const uint64_t incX, float32_t *Y, const uint64_t incY, const float32_t *P); -void srotmg(float32_t *D1, float32_t *D2, float32_t *X1, const float32_t y1, float32_t *P); -void sscal(uint64_t N, float32_t SA, float32_t *SX, uint64_t incX); -void sswap(uint64_t N, float32_t *SX, uint64_t incX, float32_t *SY, uint64_t incY); +float32_t sasum(uint64_t N, const float32_t *SX, uint64_t incX, const uint_fast8_t rndMode); +void saxpy(uint64_t N, float32_t SA, float32_t *SX, int64_t incX, float32_t *SY, int64_t incY, const uint_fast8_t rndMode); +void scopy(uint64_t N, const float32_t *SX, int64_t incX, float32_t *SY, int64_t incY, const uint_fast8_t rndMode); +float32_t sdot(const uint64_t N, const float32_t *X, const int64_t incX, const float32_t *Y, const int64_t incY, const uint_fast8_t rndMode); +float32_t snrm2(uint64_t N, const float32_t *X, uint64_t incX, const uint_fast8_t rndMode); +void srot(const uint64_t N, float32_t *X, const uint64_t incX, float32_t *Y, const uint64_t incY, const float32_t c, const float32_t s, const uint_fast8_t rndMode); +void srotg(float32_t *a, float32_t *b, float32_t *c, float32_t *s, const uint_fast8_t rndMode); +void srotm(const uint64_t N, float32_t *X, const uint64_t incX, float32_t *Y, const uint64_t incY, const float32_t *P, const uint_fast8_t rndMode); +void srotmg(float32_t *D1, float32_t *D2, float32_t *X1, const float32_t y1, float32_t *P, const uint_fast8_t rndMode); +void sscal(uint64_t N, float32_t SA, float32_t *SX, uint64_t incX, const uint_fast8_t rndMode); +void sswap(uint64_t N, float32_t *SX, uint64_t incX, float32_t *SY, uint64_t incY, const uint_fast8_t rndMode); uint64_t isamax(uint64_t N, const float32_t *SX, uint64_t incX); // Double-precision -float64_t dasum(uint64_t N, const float64_t *DX, uint64_t incX); -void daxpy(uint64_t N, float64_t DA, float64_t *DX, int64_t incX, float64_t *DY, int64_t incY); -void dcopy(uint64_t N, const float64_t *DX, int64_t incX, float64_t *DY, int64_t incY); -float64_t ddot(const uint64_t N, const float64_t *X, const int64_t incX, const float64_t *Y, const int64_t incY); -float64_t dnrm2(uint64_t N, const float64_t *X, uint64_t incX); -void drot(const uint64_t N, float64_t *X, const uint64_t incX, float64_t *Y, const uint64_t incY, const float64_t c, const float64_t s); -void drotg(float64_t *a, float64_t *b, float64_t *c, float64_t *s); -void drotm(const uint64_t N, float64_t *X, const uint64_t incX, float64_t *Y, const uint64_t incY, const float64_t *P); -void drotmg(float64_t *D1, float64_t *D2, float64_t *X1, const float64_t y1, float64_t *P); -void dscal(uint64_t N, float64_t DA, float64_t *DX, uint64_t incX); -void dswap(uint64_t N, float64_t *DX, uint64_t incX, float64_t *DY, uint64_t incY); +float64_t dasum(uint64_t N, const float64_t *DX, uint64_t incX, const uint_fast8_t rndMode); +void daxpy(uint64_t N, float64_t DA, float64_t *DX, int64_t incX, float64_t *DY, int64_t incY, const uint_fast8_t rndMode); +void dcopy(uint64_t N, const float64_t *DX, int64_t incX, float64_t *DY, int64_t incY, const uint_fast8_t rndMode); +float64_t ddot(const uint64_t N, const float64_t *X, const int64_t incX, const float64_t *Y, const int64_t incY, const uint_fast8_t rndMode); +float64_t dnrm2(uint64_t N, const float64_t *X, uint64_t incX, const uint_fast8_t rndMode); +void drot(const uint64_t N, float64_t *X, const uint64_t incX, float64_t *Y, const uint64_t incY, const float64_t c, const float64_t s, const uint_fast8_t rndMode); +void drotg(float64_t *a, float64_t *b, float64_t *c, float64_t *s, const uint_fast8_t rndMode); +void drotm(const uint64_t N, float64_t *X, const uint64_t incX, float64_t *Y, const uint64_t incY, const float64_t *P, const uint_fast8_t rndMode); +void drotmg(float64_t *D1, float64_t *D2, float64_t *X1, const float64_t y1, float64_t *P, const uint_fast8_t rndMode); +void dscal(uint64_t N, float64_t DA, float64_t *DX, uint64_t incX, const uint_fast8_t rndMode); +void dswap(uint64_t N, float64_t *DX, uint64_t incX, float64_t *DY, uint64_t incY, const uint_fast8_t rndMode); uint64_t idamax(uint64_t N, const float64_t *DX, uint64_t incX); // Half-precision -float16_t hasum(uint64_t N, const float16_t *HX, uint64_t incX); -void haxpy(uint64_t N, float16_t HA, float16_t *HX, int64_t incX, float16_t *HY, int64_t incY); -void hcopy(uint64_t N, const float16_t *HX, int64_t incX, float16_t *HY, int64_t incY); -float16_t hdot(const uint64_t N, const float16_t *X, const int64_t incX, const float16_t *Y, const int64_t incY); -float16_t hnrm2(uint64_t N, const float16_t *X, uint64_t incX); -void hrot(const uint64_t N, float16_t *X, const uint64_t incX, float16_t *Y, const uint64_t incY, const float16_t c, const float16_t s); -void hrotg(float16_t *a, float16_t *b, float16_t *c, float16_t *s); -void hrotm(const uint64_t N, float16_t *X, const uint64_t incX, float16_t *Y, const uint64_t incY, const float16_t *P); -void hrotmg(float16_t *D1, float16_t *D2, float16_t *X1, const float16_t y1, float16_t *P); -void hscal(uint64_t N, float16_t HA, float16_t *HX, uint64_t incX); -void hswap(uint64_t N, float16_t *HX, uint64_t incX, float16_t *HY, uint64_t incY); +float16_t hasum(uint64_t N, const float16_t *HX, uint64_t incX, const uint_fast8_t rndMode); +void haxpy(uint64_t N, float16_t HA, float16_t *HX, int64_t incX, float16_t *HY, int64_t incY, const uint_fast8_t rndMode); +void hcopy(uint64_t N, const float16_t *HX, int64_t incX, float16_t *HY, int64_t incY, const uint_fast8_t rndMode); +float16_t hdot(const uint64_t N, const float16_t *X, const int64_t incX, const float16_t *Y, const int64_t incY, const uint_fast8_t rndMode); +float16_t hnrm2(uint64_t N, const float16_t *X, uint64_t incX, const uint_fast8_t rndMode); +void hrot(const uint64_t N, float16_t *X, const uint64_t incX, float16_t *Y, const uint64_t incY, const float16_t c, const float16_t s, const uint_fast8_t rndMode); +void hrotg(float16_t *a, float16_t *b, float16_t *c, float16_t *s, const uint_fast8_t rndMode); +void hrotm(const uint64_t N, float16_t *X, const uint64_t incX, float16_t *Y, const uint64_t incY, const float16_t *P, const uint_fast8_t rndMode); +void hrotmg(float16_t *D1, float16_t *D2, float16_t *X1, const float16_t y1, float16_t *P, const uint_fast8_t rndMode); +void hscal(uint64_t N, float16_t HA, float16_t *HX, uint64_t incX, const uint_fast8_t rndMode); +void hswap(uint64_t N, float16_t *HX, uint64_t incX, float16_t *HY, uint64_t incY, const uint_fast8_t rndMode); uint64_t ihamax(uint64_t N, const float16_t *HX, uint64_t incX); // Quad-precision -float128_t qasum(uint64_t N, const float128_t *QX, uint64_t incX); -void qaxpy(uint64_t N, float128_t QA, float128_t *QX, int64_t incX, float128_t *QY, int64_t incY); -void qcopy(uint64_t N, const float128_t *QX, int64_t incX, float128_t *QY, int64_t incY); -float128_t qdot(const uint64_t N, const float128_t *X, const int64_t incX, const float128_t *Y, const int64_t incY); -float128_t qnrm2(uint64_t N, const float128_t *X, uint64_t incX); -void qrot(const uint64_t N, float16_t *X, const uint64_t incX, float16_t *Y, const uint64_t incY, const float16_t c, const float16_t s); -void qrotg(float128_t *a, float128_t *b, float128_t *c, float128_t *s); -void qrotm(const uint64_t N, float128_t *X, const uint64_t incX, float128_t *Y, const uint64_t incY, const float128_t *P); -void qrotmg(float128_t *D1, float128_t *D2, float128_t *X1, const float128_t y1, float128_t *P); -void qscal(uint64_t N, float128_t QA, float128_t *QX, uint64_t incX); -void qswap(uint64_t N, float128_t *QX, uint64_t incX, float128_t *QY, uint64_t incY); +float128_t qasum(uint64_t N, const float128_t *QX, uint64_t incX, const uint_fast8_t rndMode); +void qaxpy(uint64_t N, float128_t QA, float128_t *QX, int64_t incX, float128_t *QY, int64_t incY, const uint_fast8_t rndMode); +void qcopy(uint64_t N, const float128_t *QX, int64_t incX, float128_t *QY, int64_t incY, const uint_fast8_t rndMode); +float128_t qdot(const uint64_t N, const float128_t *X, const int64_t incX, const float128_t *Y, const int64_t incY, const uint_fast8_t rndMode); +float128_t qnrm2(uint64_t N, const float128_t *X, uint64_t incX, const uint_fast8_t rndMode); +void qrot(const uint64_t N, float16_t *X, const uint64_t incX, float16_t *Y, const uint64_t incY, const float16_t c, const float16_t s, const uint_fast8_t rndMode); +void qrotg(float128_t *a, float128_t *b, float128_t *c, float128_t *s, const uint_fast8_t rndMode); +void qrotm(const uint64_t N, float128_t *X, const uint64_t incX, float128_t *Y, const uint64_t incY, const float128_t *P, const uint_fast8_t rndMode); +void qrotmg(float128_t *D1, float128_t *D2, float128_t *X1, const float128_t y1, float128_t *P, const uint_fast8_t rndMode); +void qscal(uint64_t N, float128_t QA, float128_t *QX, uint64_t incX, const uint_fast8_t rndMode); +void qswap(uint64_t N, float128_t *QX, uint64_t incX, float128_t *QY, uint64_t incY, const uint_fast8_t rndMode); uint64_t iqamax(uint64_t N, const float128_t *QX, uint64_t incX); // Complex single-precision -float32_t scasum(uint64_t N, const complex32_t *CX, int64_t incX); -void caxpy(uint64_t N, complex32_t CA, complex32_t *CX, int64_t incX, complex32_t *HY, int64_t incY); -void ccopy(uint64_t N, const complex32_t *CX, int64_t incX, complex32_t *CY, int64_t incY); -complex32_t cdotc(uint64_t N, const complex32_t *CX, int64_t incX, const complex32_t *CY, int64_t incY); -float32_t scnrm2(uint64_t N, const complex32_t *CX, uint64_t incX); -void csrot(const uint64_t N, complex32_t *CX, const uint64_t incX, complex32_t *CY, const uint64_t incY, const complex32_t c, const complex32_t s); +float32_t scasum(uint64_t N, const complex32_t *CX, int64_t incX, const uint_fast8_t rndMode); +void caxpy(uint64_t N, complex32_t CA, complex32_t *CX, int64_t incX, complex32_t *HY, int64_t incY, const uint_fast8_t rndMode); +void ccopy(uint64_t N, const complex32_t *CX, int64_t incX, complex32_t *CY, int64_t incY, const uint_fast8_t rndMode); +complex32_t cdotc(uint64_t N, const complex32_t *CX, int64_t incX, const complex32_t *CY, int64_t incY, const uint_fast8_t rndMode); +float32_t scnrm2(uint64_t N, const complex32_t *CX, uint64_t incX, const uint_fast8_t rndMode); +void csrot(const uint64_t N, complex32_t *CX, const uint64_t incX, complex32_t *CY, const uint64_t incY, const complex32_t c, const complex32_t s, const uint_fast8_t rndMode); -void cscal(uint64_t N, complex32_t CA, complex32_t *CX, uint64_t incX); -void cswap(uint64_t N, complex32_t *CX, uint64_t incX, complex32_t *CY, uint64_t incY); -uint64_t icamax(uint64_t N, const complex32_t *CX, uint64_t incX); +void cscal(uint64_t N, complex32_t CA, complex32_t *CX, uint64_t incX, const uint_fast8_t rndMode); +void cswap(uint64_t N, complex32_t *CX, uint64_t incX, complex32_t *CY, uint64_t incY, const uint_fast8_t rndMode); +uint64_t icamax(uint64_t N, const complex32_t *CX, uint64_t incX, const uint_fast8_t rndMode); // Complex double-precision @@ -227,17 +236,17 @@ uint64_t icamax(uint64_t N, const complex32_t *CX, uint64_t incX); // Level 2 -void sgemv(const char Layout, const char Trans, const uint64_t M, const uint64_t N, const float32_t alpha, const float32_t *A, const uint64_t lda, const float32_t *X, const int64_t incX, const float32_t beta, float32_t *Y, const uint64_t incY); -void dgemv(const char Layout, const char Trans, const uint64_t M, const uint64_t N, const float64_t alpha, const float64_t *A, const uint64_t lda, const float64_t *X, const int64_t incX, const float64_t beta, float64_t *Y, const uint64_t incY); -void hgemv(const char Layout, const char Trans, const uint64_t M, const uint64_t N, const float16_t alpha, const float16_t *A, const uint64_t lda, const float16_t *X, const int64_t incX, const float16_t beta, float16_t *Y, const uint64_t incY); -void qgemv(const char Layout, const char Trans, const uint64_t M, const uint64_t N, const float128_t alpha, const float128_t *A, const uint64_t lda, const float128_t *X, const int64_t incX, const float128_t beta, float128_t *Y, const uint64_t incY); +void sgemv(const char Layout, const char Trans, const uint64_t M, const uint64_t N, const float32_t alpha, const float32_t *A, const uint64_t lda, const float32_t *X, const int64_t incX, const float32_t beta, float32_t *Y, const uint64_t incY, const uint_fast8_t rndMode); +void dgemv(const char Layout, const char Trans, const uint64_t M, const uint64_t N, const float64_t alpha, const float64_t *A, const uint64_t lda, const float64_t *X, const int64_t incX, const float64_t beta, float64_t *Y, const uint64_t incY, const uint_fast8_t rndMode); +void hgemv(const char Layout, const char Trans, const uint64_t M, const uint64_t N, const float16_t alpha, const float16_t *A, const uint64_t lda, const float16_t *X, const int64_t incX, const float16_t beta, float16_t *Y, const uint64_t incY, const uint_fast8_t rndMode); +void qgemv(const char Layout, const char Trans, const uint64_t M, const uint64_t N, const float128_t alpha, const float128_t *A, const uint64_t lda, const float128_t *X, const int64_t incX, const float128_t beta, float128_t *Y, const uint64_t incY, const uint_fast8_t rndMode); // Level 3 -void sgemm(const char transA, const char transB, const uint64_t M, const uint64_t N, const uint64_t P, const float32_t alpha, const float32_t *A, const uint64_t lda, const float32_t *B, const uint64_t ldb, const float32_t beta, float32_t *C, const uint64_t ldc); -void dgemm(const char transA, const char transB, const uint64_t M, const uint64_t N, const uint64_t P, const float64_t alpha, const float64_t *A, const uint64_t lda, const float64_t *B, const uint64_t ldb, const float64_t beta, float64_t *C, const uint64_t ldc); -void hgemm(const char transA, const char transB, const uint64_t M, const uint64_t N, const uint64_t P, const float16_t alpha, const float16_t *A, const uint64_t lda, const float16_t *B, const uint64_t ldb, const float16_t beta, float16_t *C, const uint64_t ldc); -void qgemm(const char transA, const char transB, const uint64_t M, const uint64_t N, const uint64_t P, const float128_t alpha, const float128_t *A, const uint64_t lda, const float128_t *B, const uint64_t ldb, const float128_t beta, float128_t *C, const uint64_t ldc); +void sgemm(const char transA, const char transB, const uint64_t M, const uint64_t N, const uint64_t P, const float32_t alpha, const float32_t *A, const uint64_t lda, const float32_t *B, const uint64_t ldb, const float32_t beta, float32_t *C, const uint64_t ldc, const uint_fast8_t rndMode); +void dgemm(const char transA, const char transB, const uint64_t M, const uint64_t N, const uint64_t P, const float64_t alpha, const float64_t *A, const uint64_t lda, const float64_t *B, const uint64_t ldb, const float64_t beta, float64_t *C, const uint64_t ldc, const uint_fast8_t rndMode); +void hgemm(const char transA, const char transB, const uint64_t M, const uint64_t N, const uint64_t P, const float16_t alpha, const float16_t *A, const uint64_t lda, const float16_t *B, const uint64_t ldb, const float16_t beta, float16_t *C, const uint64_t ldc, const uint_fast8_t rndMode); +void qgemm(const char transA, const char transB, const uint64_t M, const uint64_t N, const uint64_t P, const float128_t alpha, const float128_t *A, const uint64_t lda, const float128_t *B, const uint64_t ldb, const float128_t beta, float128_t *C, const uint64_t ldc, const uint_fast8_t rndMode); // NAN unification @@ -380,4 +389,27 @@ static inline float128_t* qvec(float128_pair_t pairs[], uint64_t size) { return result; } + static inline void + _set_rounding(uint_fast8_t a) + { + switch ( a ) + { + default: + exit(1); + break; + case 'n': + softfloat_roundingMode = softfloat_round_near_even; + break; + case 'z': + softfloat_roundingMode = softfloat_round_minMag; + break; + case 'u': + softfloat_roundingMode = softfloat_round_max; + break; + case 'd': + softfloat_roundingMode = softfloat_round_min; + break; + } + } + #endif // SOFTBLAS_H diff --git a/include/trace.h b/include/trace.h new file mode 100644 index 0000000..695a5fe --- /dev/null +++ b/include/trace.h @@ -0,0 +1,187 @@ +/// @file + +#ifndef U3_TRACE_H +#define U3_TRACE_H + +#include "c3/c3.h" +#include "options.h" +#include "types.h" + +#ifdef U3_CPU_DEBUG +# include "options.h" +#endif + + /** Data structures. + **/ + /* u3t_trace: fast execution flags. + */ + typedef struct _u3t_trace { + c3_o noc_o; // now executing in nock interpreter + c3_o glu_o; // now executing in jet glue + c3_o mal_o; // now executing in allocator + c3_o far_o; // now executing in fragmentor + c3_o coy_o; // now executing in copy + c3_o euq_o; // now executing in equal + } u3t_trace; + + /** Macros. + **/ +# ifdef U3_CPU_DEBUG +# define u3t_on(var) \ + (u3T.var = (u3C.wag_w & u3o_debug_cpu) \ + ? (c3n == u3T.var) ? c3y : (abort(), 0) \ + : u3T.var) +# else +# define u3t_on(var) +#endif + +# ifdef U3_CPU_DEBUG +# define u3t_off(var) \ + (u3T.var = (u3C.wag_w & u3o_debug_cpu) \ + ? (c3y == u3T.var) ? c3n : (abort(), 0) \ + : u3T.var) +# else +# define u3t_off(var) +#endif + + + /** Functions. + **/ + /* u3t_init(): initialize tracing layer. + */ + void + u3t_init(void); + + /// @return Number of entries written to the JSON trace file. + c3_w + u3t_trace_cnt(void); + + /// @return Number of times u3t_trace_close() has been called. + c3_w + u3t_file_cnt(void); + + /* u3t_push(): push on trace stack. + */ + void + u3t_push(u3_noun mon); + + /* u3t_mean(): push `[%mean roc]` on trace stack. + */ + void + u3t_mean(u3_noun roc); + + /* u3t_drop(): drop from meaning stack. + */ + void + u3t_drop(void); + + /* u3t_slog(): print directly. + */ + void + u3t_slog(u3_noun hod); + + /* u3t_heck(): profile point. + */ + void + u3t_heck(u3_atom cog); + + /* u3t_samp(): sample. + */ + void + u3t_samp(void); + + /* u3t_come(): push on profile stack; return yes if active push. RETAIN. + */ + c3_o + u3t_come(u3_noun bat); + + /* u3t_flee(): pop off profile stack. + */ + void + u3t_flee(void); + + /* u3t_trace_open(): opens the path for writing tracing information. + */ + void + u3t_trace_open(const c3_c *dir_c); + + /* u3t_trace_close(): closes the trace file. optional. + */ + void + u3t_trace_close(); + + /* u3t_trace_time(): returns current time since system epoc, + * whatever it is per system, in microseconds. + */ + c3_d + u3t_trace_time(); + + /* u3t_nock_trace_push(): pushes a frame onto the trace stack; + * return yes if active push. + */ + c3_o + u3t_nock_trace_push(u3_noun lab); + + /* u3t_nock_trace_pop(): pop off trace stack. + */ + void + u3t_nock_trace_pop(); + + /* u3t_event_trace(): record a lifecycle event. + */ + void + u3t_event_trace(const c3_c* name, c3_c type); + + /* u3t_damp(): print and clear profile data. + */ + void + u3t_damp(FILE* fil_u); + + /* u3t_boff(): turn profile sampling off. + */ + void + u3t_boff(void); + + /* u3t_boot(): turn sampling on. + */ + void + u3t_boot(void); + + /* u3t_slog_cap(): slog a tank with a caption with + ** a given priority c3_l (assumed 0-3). + */ + void + u3t_slog_cap(c3_l pri_l, u3_noun cap, u3_noun tan); + + /* u3t_slog_trace(): given a c3_l priority pri and a raw stack tax + ** flop the order into start-to-end, render, and slog each item + ** until done. + */ + void + u3t_slog_trace(c3_l pri_l, u3_noun tax); + + /* u3t_slog_nara(): slog only the deepest road's trace with + ** c3_l priority pri + */ + void + u3t_slog_nara(c3_l pri_l); + + /* u3t_slog_hela(): join all roads' traces together into one tax + ** and pass it to slog_trace along with the given c3_l priority pri_l + */ + void + u3t_slog_hela(c3_l pri_l); + + /* u3t_etch_meme(): report memory stats at call time + */ + u3_noun + u3t_etch_meme(c3_l mod_l); + + /** Globals. + **/ + /// Tracing profiler. + extern u3t_trace u3t_Trace; +# define u3T u3t_Trace + + +#endif /* ifndef U3_TRACE_H */ diff --git a/include/types.h b/include/types.h new file mode 100644 index 0000000..c921900 --- /dev/null +++ b/include/types.h @@ -0,0 +1,64 @@ +/// @file +/// +/// Various noun types. + +#ifndef U3_TYPES_H +#define U3_TYPES_H + +#include "c3.h" + +/// Sentinel value for u3_noun types that aren't actually nouns. +#define u3_none (u3_noun)0xffffffff + +/// 0, or `~` in Hoon. +#define u3_nul 0 + +/// 0, or `%$` in Hoon. +#define u3_blip 0 + +/// Pointer offset into the loom. +/// +/// Declare variables of this type using u3p() to annotate the type of the +/// pointee. Ensure that variable names of this type end in `_p`. +typedef c3_w u3_post; +#define u3p(type) u3_post + +/// Tagged noun pointer. +/// +/// If bit 31 is 0, the noun is a direct 31-bit atom (also called a "cat"). +/// If bit 31 is 1 and bit 30 is 0, an indirect atom (also called a "pug"). +/// If bit 31 is 1 and bit 30 is 1, an indirect cell (also called a "pom"). +/// +/// Bits 0-29 are a word offset (i.e. u3_post) against the loom. +typedef c3_w u3_noun; + +/// Optional noun type. +/// +/// u3_weak is either a valid noun or u3_none. +typedef u3_noun u3_weak; + +/// Atom. +typedef u3_noun u3_atom; + +/// Term (Hoon aura @tas). +typedef u3_noun u3_term; + +/// Cell of the form `[a b]`. +typedef u3_noun u3_cell; + +/// Cell of the form `[a b c]`. +typedef u3_noun u3_trel; + +/// Cell of the form `[a b c d]`. +typedef u3_noun u3_qual; + +/// Cell of the form `[a b c d e]`. +typedef u3_noun u3_quin; + +/// Unary noun function. +typedef u3_noun (*u3_funk)(u3_noun); + +/// Binary noun function. +typedef u3_noun (*u3_funq)(u3_noun, u3_noun); + +#endif /* ifndef U3_TYPES_H */ diff --git a/include/urth.h b/include/urth.h new file mode 100644 index 0000000..0dc2aa8 --- /dev/null +++ b/include/urth.h @@ -0,0 +1,44 @@ +/// @file + +#ifndef U3_URTH_H +#define U3_URTH_H + +#include "c3.h" + + /** Functions. + **/ + /* u3u_meld(): globally deduplicate memory, returns u3a_open delta. + */ + c3_w + u3u_meld(void); + + /* u3u_cram(): globably deduplicate memory, and write a rock to disk. + */ + c3_o + u3u_cram(c3_c* dir_c, c3_d eve_d); + /* u3u_uncram(): restore persistent state from a rock. + */ + c3_o + u3u_uncram(c3_c* dir_c, c3_d eve_d); + + /* u3u_mmap_read(): open and mmap the file at [pat_c] for reading. + */ + c3_o + u3u_mmap_read(c3_c* cap_c, c3_c* pat_c, c3_d* out_d, c3_y** out_y); + + /* u3u_mmap(): open/create file-backed mmap at [pat_c] for read/write. + */ + c3_o + u3u_mmap(c3_c* cap_c, c3_c* pat_c, c3_d len_d, c3_y** out_y); + + /* u3u_mmap_save(): sync file-backed mmap. + */ + c3_o + u3u_mmap_save(c3_c* cap_c, c3_c* pat_c, c3_d len_d, c3_y* byt_y); + + /* u3u_munmap(): unmap the region at [byt_y]. + */ + c3_o + u3u_munmap(c3_d len_d, c3_y* byt_y); + +#endif /* ifndef U3_URTH_H */ diff --git a/include/version.h b/include/version.h new file mode 100644 index 0000000..5cc6acb --- /dev/null +++ b/include/version.h @@ -0,0 +1,28 @@ +#ifndef U3_VERSION_H +#define U3_VERSION_H + +/* VORTEX + */ + +typedef c3_w u3v_version; + +#define U3V_VER1 1 +#define U3V_VER2 2 +#define U3V_VER3 3 +#define U3V_VERLAT U3V_VER3 + +/* EVENTS + */ + +typedef c3_w u3e_version; + +#define U3E_VER1 1 +#define U3E_VERLAT U3E_VER1 + +/* DISK FORMAT + */ + +#define U3D_VER1 1 +#define U3D_VERLAT U3L_VER1 + +#endif /* ifndef U3_VERSION_H */ diff --git a/include/vortex.h b/include/vortex.h new file mode 100644 index 0000000..2d202e8 --- /dev/null +++ b/include/vortex.h @@ -0,0 +1,146 @@ +/// @file + +#ifndef U3_VORTEX_H +#define U3_VORTEX_H + +#include "allocate.h" +#include "c3.h" +#include "imprison.h" +#include "version.h" + + /** Data structures. + **/ + /* u3v_arvo: modern arvo structure. + ** NB: packed to perserve word alignment given [eve_d] + */ + typedef struct __attribute__((__packed__)) _u3v_arvo { + c3_d eve_d; // event number + u3_noun yot; // cached gates + u3_noun now; // current time + u3_noun roc; // kernel core + } u3v_arvo; + + /* u3v_home: all internal (within image) state. + ** NB: version must be last for discriminability in north road + */ + typedef struct _u3v_home { + u3a_road rod_u; // storage state + u3v_arvo arv_u; // arvo state + u3v_version ver_w; // version number + } u3v_home; + + + /** Globals. + **/ + /// Arvo internal state. + extern u3v_home* u3v_Home; +# define u3H u3v_Home +# define u3A (&(u3v_Home->arv_u)) + + /** Functions. + **/ + /* u3v_life(): execute initial lifecycle, producing Arvo core. + */ + u3_noun + u3v_life(u3_noun eve); + + /* u3v_boot(): evaluate boot sequence, making a kernel + */ + c3_o + u3v_boot(u3_noun eve); + + /* u3v_boot_lite(): light bootstrap sequence, just making a kernel. + */ + c3_o + u3v_boot_lite(u3_noun lit); + + /* u3v_wish_n(): text expression with cache. + */ + u3_noun + u3v_wish_n(const u3_noun txt); + + /* u3v_do(): use a kernel function. + */ + u3_noun + u3v_do(const c3_c* txt_c, u3_noun arg); +# define u3do(txt_c, a) u3v_do(txt_c, a) +# define u3dc(txt_c, a, b) u3v_do(txt_c, u3nc(a, b)) +# define u3dt(txt_c, a, b, c) u3v_do(txt_c, u3nt(a, b, c)) +# define u3dq(txt_c, a, b, c, d) u3v_do(txt_c, u3nq(a, b, c, d)) + + /* u3v_wish(): text expression with cache. + */ + u3_noun + u3v_wish(const c3_c* str_c); + + /* u3v_time(): set the reck time. + */ + void + u3v_time(u3_noun now); + + /* u3v_lily(): parse little atom. + */ + c3_o + u3v_lily(u3_noun fot, u3_noun txt, c3_l* tid_l); + + /* u3v_peek(): query the reck namespace. + */ + u3_noun + u3v_peek(u3_noun hap); + + /* u3v_soft_peek(): softly query the reck namespace. + */ + u3_noun + u3v_soft_peek(c3_w mil_w, u3_noun sam); + + /* u3v_poke(): insert and apply an input ovum (protected). + */ + u3_noun + u3v_poke(u3_noun ovo); + + /* u3v_poke_raw(): u3v_poke w/out u3A->now XX replace + */ + u3_noun + u3v_poke_raw(u3_noun sam); + + /* u3v_poke_sure(): inject an event, saving new state if successful. + */ + c3_o + u3v_poke_sure(c3_w mil_w, u3_noun eve, u3_noun* pro); + + /* u3v_tank(): dump single tank. + */ + void + u3v_tank(u3_noun blu, c3_l tab_l, u3_noun tac); + + /* u3v_punt(): dump tank list. + */ + void + u3v_punt(u3_noun blu, c3_l tab_l, u3_noun tac); + + /* u3v_sway(): print trace. + */ + void + u3v_sway(u3_noun blu, c3_l tab_l, u3_noun tax); + + /* u3v_plan(): queue ovum (external). + */ + void + u3v_plan(u3_noun pax, u3_noun fav); + + /* u3v_mark(): mark arvo kernel. + */ + c3_w + u3v_mark(FILE* fil_u); + + /* u3v_reclaim(): clear ad-hoc persistent caches to reclaim memory. + */ + void + u3v_reclaim(void); + + /* u3v_rewrite_compact(): rewrite arvo kernel for compaction. + */ + void + u3v_rewrite_compact(); + +#endif /* ifndef U3_VORTEX_H */ diff --git a/include/xtract.h b/include/xtract.h new file mode 100644 index 0000000..343690a --- /dev/null +++ b/include/xtract.h @@ -0,0 +1,154 @@ +/// @file + +#ifndef U3_XTRACT_H +#define U3_XTRACT_H + +#include "c3.h" +#include "types.h" +#include "allocate.h" +#include "manage.h" + + /** Constants. + **/ + /* Conventional axes for gate call. + */ +# define u3x_pay 3 // payload +# define u3x_sam 6 // sample +# define u3x_sam_1 6 +# define u3x_sam_2 12 +# define u3x_sam_3 13 +# define u3x_sam_4 24 +# define u3x_sam_5 25 +# define u3x_sam_6 26 +# define u3x_sam_12 52 +# define u3x_sam_13 53 +# define u3x_sam_7 27 +# define u3x_sam_14 54 +# define u3x_sam_15 55 +# define u3x_sam_30 110 +# define u3x_sam_31 111 +# define u3x_sam_62 222 +# define u3x_sam_63 223 +# define u3x_con 7 // context +# define u3x_con_2 14 // context +# define u3x_con_3 15 // context +# define u3x_con_sam 30 // sample in gate context +# define u3x_con_sam_2 60 +# define u3x_con_sam_3 61 +# define u3x_bat 2 // battery + + + /** Macros. + **/ + /* Word axis macros. For 31-bit axes only. + */ + + /* u3x_at (u3at): fragment. + */ +# define u3x_at(a, b) u3x_good(u3r_at(a, b)) +# define u3at(a, b) u3x_at(a, b) + + /* u3x_bite(): xtract/default $bloq and $step from $bite. + */ +# define u3x_bite(a, b, c) \ + do { \ + if ( c3n == u3r_bite(a, b, c) ) { \ + u3m_bail(c3__exit); \ + } \ + } while (0) + + /* u3x_dep(): number of axis bits. + */ +# define u3x_dep(a_w) (c3_bits_word(a_w) - 1) + + /* u3x_cap(): root axis, 2 or 3. + */ +# define u3x_cap(a_w) ({ \ + u3_assert( 1 < a_w ); \ + (0x2 | (a_w >> (u3x_dep(a_w) - 1))); }) + + /* u3x_mas(): remainder after cap. + */ +# define u3x_mas(a_w) ({ \ + u3_assert( 1 < a_w ); \ + ( (a_w & ~(1 << u3x_dep(a_w))) | (1 << (u3x_dep(a_w) - 1)) ); }) + + /* u3x_peg(): connect two axes. + */ +# define u3x_peg(a_w, b_w) \ + ( (a_w << u3x_dep(b_w)) | (b_w &~ (1 << u3x_dep(b_w))) ) + + /* u3x_cell(): divide `a` as a cell `[b c]`. + */ +# define u3x_cell(a, b, c) \ + do { \ + if ( c3n == u3r_cell(a, b, c) ) { \ + u3m_bail(c3__exit); \ + } \ + } while (0) + + /* u3x_trel(): divide `a` as a trel `[b c d]`, or bail. + */ +# define u3x_trel(a, b, c, d) \ + do { \ + if ( c3n == u3r_trel(a, b, c, d) ) { \ + u3m_bail(c3__exit); \ + } \ + } while (0) + + /* u3x_qual(): divide `a` as a quadruple `[b c d e]`. + */ +# define u3x_qual(a, b, c, d, e) \ + do { \ + if ( c3n == u3r_qual(a, b, c, d, e) ) { \ + u3m_bail(c3__exit); \ + } \ + } while (0) + + /* u3x_quil(): divide `a` as a quintuple `[b c d e f]`. + */ +# define u3x_quil(a, b, c, d, e, f) \ + do { \ + if ( c3n == u3r_quil(a, b, c, d, e, f) ) { \ + u3m_bail(c3__exit); \ + } \ + } while (0) + + /* u3x_hext(): divide `a` as a hextuple `[b c d e f g]`. + */ +# define u3x_hext(a, b, c, d, e, f, g) \ + do { \ + if ( c3n == u3r_hext(a, b, c, d, e, f, g) ) { \ + u3m_bail(c3__exit); \ + } \ + } while (0) + + /** Functions. + **/ + /** u3x_*: read, but bail with c3__exit on a crash. + **/ + /* u3x_atom(): atom or exit. + */ + inline u3_atom + u3x_atom(u3_noun a) + { + return ( c3y == u3a_is_cell(a) ) ? u3m_bail(c3__exit) : a; + } + + /* u3x_good(): test for u3_none. + */ + inline u3_noun + u3x_good(u3_weak som) + { + return ( u3_none == som ) ? u3m_bail(c3__exit) : som; + } + + /* u3x_mean(): + ** + ** Attempt to deconstruct `a` by axis, noun pairs; 0 terminates. + ** Axes must be sorted in tree order. + */ + void + u3x_mean(u3_noun a, ...); + +#endif /* ifndef U3_XTRACT_H */ diff --git a/include/zave.h b/include/zave.h new file mode 100644 index 0000000..6857427 --- /dev/null +++ b/include/zave.h @@ -0,0 +1,68 @@ +/// @file + +#ifndef U3_ZAVE_H +#define U3_ZAVE_H + +#include "c3.h" +#include "types.h" + + /** Memoization. + *** + *** The memo cache is keyed by an arbitrary symbolic function + *** and a noun argument to that (logical) function. Functions + *** are predefined by C-level callers, but 0 means nock. + *** + *** Memo functions RETAIN keys and transfer values. + **/ + /* u3z_cid: cache id + */ + typedef enum { + u3z_memo_toss = 0, + u3z_memo_keep = 1, + // u3z_memo_ford = 2, + // u3z_memo_ames = 3, + // ... + } u3z_cid; + + /* u3z_key*(): construct a memo cache-key. Arguments retained. + */ + u3_noun u3z_key(c3_m, u3_noun); + u3_noun u3z_key_2(c3_m, u3_noun, u3_noun); + u3_noun u3z_key_3(c3_m, u3_noun, u3_noun, u3_noun); + u3_noun u3z_key_4(c3_m, u3_noun, u3_noun, u3_noun, u3_noun); + u3_noun u3z_key_5(c3_m, u3_noun, u3_noun, u3_noun, u3_noun, u3_noun); + + /* u3z_find*(): find in memo cache. Arguments retained + */ + u3_weak u3z_find(u3z_cid cid, u3_noun key); + u3_weak u3z_find_m(u3z_cid cid, c3_m fun_m, u3_noun one); + + /* u3z_save(): save in memo cache. TRANSFER key; RETAIN val; + */ + u3_noun u3z_save(u3z_cid cid, u3_noun key, u3_noun val); + + /* u3z_save_m(): save in memo cache. Arguments retained + */ + u3_noun u3z_save_m(u3z_cid cid, c3_m fun_m, u3_noun one, u3_noun val); + + /* u3z_uniq(): uniquify with memo cache. + */ + u3_noun + u3z_uniq(u3z_cid cid, u3_noun som); + + /* u3z_reap(): promote persistent memoization cache. + */ + void + u3z_reap(u3p(u3h_root) per_p); + + /* u3z_free(): free memoization cache. + */ + void + u3z_free(u3z_cid cid); + + /* u3z_ream(): refresh after restoring from checkpoint. + */ + void + u3z_ream(u3z_cid cid); + +#endif /* ifndef U3_ZAVE_H */ diff --git a/src/blas/level1/daxpy.c b/src/blas/level1/daxpy.c index e56f744..8470636 100644 --- a/src/blas/level1/daxpy.c +++ b/src/blas/level1/daxpy.c @@ -1,6 +1,7 @@ #include "softblas.h" -void daxpy(uint64_t N, float64_t DA, float64_t *DX, int64_t incX, float64_t *DY, int64_t incY) { +void daxpy(uint64_t N, float64_t DA, float64_t *DX, int64_t incX, float64_t *DY, int64_t incY, const uint_fast8_t rndMode) { + _set_rounding(rndMode); int64_t iX = 0; int64_t iY = 0; if (incX < 0) iX = (-N + 1) * incX; diff --git a/src/blas/level1/dcopy.c b/src/blas/level1/dcopy.c index c9b2060..8f71fe6 100644 --- a/src/blas/level1/dcopy.c +++ b/src/blas/level1/dcopy.c @@ -1,6 +1,7 @@ #include "softblas.h" -void dcopy(uint64_t N, const float64_t *DX, int64_t incX, float64_t *DY, int64_t incY) { +void dcopy(uint64_t N, const float64_t *DX, int64_t incX, float64_t *DY, int64_t incY, const uint_fast8_t rndMode) { + _set_rounding(rndMode); int64_t iX = 0; int64_t iY = 0; if (incX < 0) iX = (-N + 1) * incX; diff --git a/src/blas/level1/ddot.c b/src/blas/level1/ddot.c index 75caf5c..92dfe0f 100644 --- a/src/blas/level1/ddot.c +++ b/src/blas/level1/ddot.c @@ -1,6 +1,7 @@ #include "softblas.h" -float64_t ddot(const uint64_t N, const float64_t *X, const int64_t incX, const float64_t *Y, const int64_t incY) { +float64_t ddot(const uint64_t N, const float64_t *X, const int64_t incX, const float64_t *Y, const int64_t incY, const uint_fast8_t rndMode) { + _set_rounding(rndMode); float64_t ddot = { SB_REAL64_ZERO }; int64_t iX = 0; diff --git a/src/blas/level1/dnrm2.c b/src/blas/level1/dnrm2.c index b589304..b33da78 100644 --- a/src/blas/level1/dnrm2.c +++ b/src/blas/level1/dnrm2.c @@ -1,6 +1,7 @@ #include "softblas.h" -float64_t dnrm2(uint64_t N, const float64_t *X, uint64_t incX) { +float64_t dnrm2(uint64_t N, const float64_t *X, uint64_t incX, const uint_fast8_t rndMode) { + _set_rounding(rndMode); float64_t norm = { SB_REAL64_ZERO }; if (N < 1 || incX < 1) { diff --git a/src/blas/level1/dscal.c b/src/blas/level1/dscal.c index 9f4b85a..2c1b2b4 100644 --- a/src/blas/level1/dscal.c +++ b/src/blas/level1/dscal.c @@ -1,6 +1,7 @@ #include "softblas.h" -void dscal(uint64_t N, float64_t DA, float64_t *DX, uint64_t incX) { +void dscal(uint64_t N, float64_t DA, float64_t *DX, uint64_t incX, const uint_fast8_t rndMode) { + _set_rounding(rndMode); uint64_t iX = 0; for (uint64_t i = 0; i < N; i++) { DX[iX] = f64_mul(DA, DX[iX]); diff --git a/src/blas/level1/dswap.c b/src/blas/level1/dswap.c index 5732b2f..4b7fba7 100644 --- a/src/blas/level1/dswap.c +++ b/src/blas/level1/dswap.c @@ -1,6 +1,7 @@ #include "softblas.h" -void dswap(uint64_t N, float64_t *DX, uint64_t incX, float64_t *DY, uint64_t incY) { +void dswap(uint64_t N, float64_t *DX, uint64_t incX, float64_t *DY, uint64_t incY, const uint_fast8_t rndMode) { + _set_rounding(rndMode); float64_t dtemp; if (N == 0) return; diff --git a/src/blas/level1/hasum.c b/src/blas/level1/hasum.c index 416aef3..87fb33b 100644 --- a/src/blas/level1/hasum.c +++ b/src/blas/level1/hasum.c @@ -1,6 +1,7 @@ #include "softblas.h" -float16_t hasum(uint64_t N, const float16_t *HX, uint64_t incX) { +float16_t hasum(uint64_t N, const float16_t *HX, uint64_t incX, const uint_fast8_t rndMode) { + _set_rounding(rndMode); float16_t htemp = { SB_REAL16_ZERO }; for (uint64_t i = 0; i < N; i++) { diff --git a/src/blas/level1/haxpy.c b/src/blas/level1/haxpy.c index 148f4df..1f0832b 100644 --- a/src/blas/level1/haxpy.c +++ b/src/blas/level1/haxpy.c @@ -1,6 +1,7 @@ #include "softblas.h" -void haxpy(uint64_t N, float16_t HA, float16_t *HX, int64_t incX, float16_t *HY, int64_t incY) { +void haxpy(uint64_t N, float16_t HA, float16_t *HX, int64_t incX, float16_t *HY, int64_t incY, const uint_fast8_t rndMode) { + _set_rounding(rndMode); int64_t iX = 0; int64_t iY = 0; if (incX < 0) iX = (-N + 1) * incX; diff --git a/src/blas/level1/hcopy.c b/src/blas/level1/hcopy.c index 553ec8e..e9a545f 100644 --- a/src/blas/level1/hcopy.c +++ b/src/blas/level1/hcopy.c @@ -1,6 +1,7 @@ #include "softblas.h" -void hcopy(uint64_t N, const float16_t *HX, int64_t incX, float16_t *HY, int64_t incY) { +void hcopy(uint64_t N, const float16_t *HX, int64_t incX, float16_t *HY, int64_t incY, const uint_fast8_t rndMode) { + _set_rounding(rndMode); int64_t iX = 0; int64_t iY = 0; if (incX < 0) iX = (-N + 1) * incX; diff --git a/src/blas/level1/hdot.c b/src/blas/level1/hdot.c index 5a31bab..126d7fd 100644 --- a/src/blas/level1/hdot.c +++ b/src/blas/level1/hdot.c @@ -1,6 +1,7 @@ #include "softblas.h" -float16_t hdot(const uint64_t N, const float16_t *X, const int64_t incX, const float16_t *Y, const int64_t incY) { +float16_t hdot(const uint64_t N, const float16_t *X, const int64_t incX, const float16_t *Y, const int64_t incY, const uint_fast8_t rndMode) { + _set_rounding(rndMode); float16_t hdot = { SB_REAL16_ZERO }; int64_t iX = 0; diff --git a/src/blas/level1/hnrm2.c b/src/blas/level1/hnrm2.c index 587f13a..2c64121 100644 --- a/src/blas/level1/hnrm2.c +++ b/src/blas/level1/hnrm2.c @@ -1,6 +1,7 @@ #include "softblas.h" -float16_t hnrm2(uint64_t N, const float16_t *X, uint64_t incX) { +float16_t hnrm2(uint64_t N, const float16_t *X, uint64_t incX, const uint_fast8_t rndMode) { + _set_rounding(rndMode); float16_t norm = { SB_REAL16_ZERO }; if (N < 1 || incX < 1) { diff --git a/src/blas/level1/hrot.c b/src/blas/level1/hrot.c index 6813371..19456c5 100644 --- a/src/blas/level1/hrot.c +++ b/src/blas/level1/hrot.c @@ -1,4 +1,5 @@ void hrot(const uint16_t N, float16_t *X, const uint16_t incX, float16_t *Y, const uint16_t incY, const float16_t c, const float16_t s) { + _set_rounding(rndMode); float16_t tmp; if (c != SB_REAL32_ONE || s != SB_REAL32_ZERO) { diff --git a/src/blas/level1/hscal.c b/src/blas/level1/hscal.c index 3ece3d6..0e49d08 100644 --- a/src/blas/level1/hscal.c +++ b/src/blas/level1/hscal.c @@ -1,6 +1,7 @@ #include "softblas.h" -void hscal(uint64_t N, float16_t HA, float16_t *HX, uint64_t incX) { +void hscal(uint64_t N, float16_t HA, float16_t *HX, uint64_t incX, const uint_fast8_t rndMode) { + _set_rounding(rndMode); uint64_t iX = 0; for (uint64_t i = 0; i < N; i++) { HX[iX] = f16_mul(HA, HX[iX]); diff --git a/src/blas/level1/hswap.c b/src/blas/level1/hswap.c index 1ca5b88..025ce71 100644 --- a/src/blas/level1/hswap.c +++ b/src/blas/level1/hswap.c @@ -1,6 +1,7 @@ #include "softblas.h" -void hswap(uint64_t N, float16_t *HX, uint64_t incX, float16_t *HY, uint64_t incY) { +void hswap(uint64_t N, float16_t *HX, uint64_t incX, float16_t *HY, uint64_t incY, const uint_fast8_t rndMode) { + _set_rounding(rndMode); float16_t htemp; if (N == 0) return;