- Notifications
You must be signed in to change notification settings - Fork 143
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Description
shecc's memory usage grows rapidly with input size, reaching hundreds of megabytes for moderate programs. The arena allocators never free memory, and data structures are not optimized for space efficiency. This issue tracks efforts to reduce memory footprint while maintaining performance.
Memory Optimization Strategies
1. Arena Compaction
Implement aggressive memory reclamation between compilation phases:
// Current: Never free memory typedef struct arena { char *base; size_t size; size_t used; struct arena *next; // Chain of arenas } arena_t; // Proposed: Compactable arenas with metadata typedef struct { void *start; size_t size; bool in_use; uint32_t phase; // Compilation phase when allocated } allocation_t; typedef struct { arena_t base; allocation_t *allocations; int alloc_count; uint32_t current_phase; } compactable_arena_t; void enter_compilation_phase(compactable_arena_t *arena, uint32_t phase) { arena->current_phase = phase; // Free allocations from previous phases for (int i = 0; i < arena->alloc_count; i++) { if (arena->allocations[i].phase < phase - 1) { arena->allocations[i].in_use = false; } } // Compact if fragmentation > threshold if (get_fragmentation(arena) > 0.5) { compact_arena(arena); } } void compact_arena(compactable_arena_t *arena) { // Move live allocations to start of arena char *write_ptr = arena->base.base; for (int i = 0; i < arena->alloc_count; i++) { if (arena->allocations[i].in_use) { size_t size = arena->allocations[i].size; void *old_ptr = arena->allocations[i].start; if (old_ptr != write_ptr) { memmove(write_ptr, old_ptr, size); update_pointers(old_ptr, write_ptr); } arena->allocations[i].start = write_ptr; write_ptr += size; } } arena->base.used = write_ptr - arena->base.base; }
2. Instruction Size Reduction
Optimize IR instruction representation:
// Current: 96 bytes per instruction typedef struct insn { opcode_t op; // 4 bytes var_t *rd, *rs1, *rs2; // 24 bytes (3 pointers) basic_block_t *bb; // 8 bytes func_t *func; // 8 bytes int imm; // 4 bytes int line_no; // 4 bytes char padding[44]; // Wasted space! } insn_t; // Total: 96 bytes // Proposed: 32 bytes per instruction typedef struct { uint16_t op; // 2 bytes uint16_t flags; // 2 bytes (combine multiple fields) uint32_t rd_idx; // 4 bytes (index instead of pointer) uint32_t rs1_idx; // 4 bytes uint32_t rs2_idx; // 4 bytes union { int32_t imm; // 4 bytes uint32_t bb_idx; // 4 bytes (for branches) }; uint32_t next_idx; // 4 bytes (index to next instruction) uint32_t line_no; // 4 bytes } compact_insn_t; // Total: 32 bytes (3x smaller!) // Variable pool for index lookup typedef struct { var_t *vars; int count; int capacity; } var_pool_t; var_t *get_var(var_pool_t *pool, uint32_t idx) { if (idx == INVALID_IDX) return NULL; return &pool->vars[idx]; }
3. Symbol Table Compression
Use string interning and compact representation:
// Current: Duplicated strings, large structs typedef struct symbol { char *name; // 8 bytes + string memory type_t *type; // 8 bytes int offset; // 4 bytes int flags; // 4 bytes struct symbol *next; // 8 bytes // ... more fields } symbol_t; // 64+ bytes // Proposed: Interned strings, packed structure typedef struct { uint32_t name_offset; // 4 bytes (offset in string pool) uint16_t type_idx; // 2 bytes (index to type table) uint16_t flags; // 2 bytes int32_t offset; // 4 bytes } compact_symbol_t; // 12 bytes (5x smaller!) typedef struct { char *buffer; // Single buffer for all strings size_t size; size_t used; hashmap_t *str_to_offset; } string_pool_t; uint32_t intern_string(string_pool_t *pool, const char *str) { // Check if already interned uint32_t *offset = hashmap_get(pool->str_to_offset, str); if (offset) return *offset; // Add to pool uint32_t new_offset = pool->used; size_t len = strlen(str) + 1; memcpy(pool->buffer + pool->used, str, len); pool->used += len; hashmap_put(pool->str_to_offset, str, new_offset); return new_offset; }
4. Basic Block Optimization
Reduce basic block memory overhead:
// Current: Linked lists with high overhead typedef struct basic_block { insn_t *insns; // Linked list of instructions int insn_count; struct basic_block **predecessors; struct basic_block **successors; int pred_count, succ_count; // ... many fields } basic_block_t; // 128+ bytes // Proposed: Compact representation typedef struct { uint32_t first_insn; // Index to first instruction uint16_t insn_count; // Number of instructions uint16_t flags; // Various boolean flags uint32_t preds; // Bitmap for small graphs uint32_t succs; // or index to edge list } compact_bb_t; // 16 bytes (8x smaller!)
5. On-Demand Loading
Load and process functions incrementally:
// Proposed: Lazy function processing typedef struct { char *name; size_t source_offset; // Offset in source file size_t source_length; bool is_parsed; bool is_compiled; void *compiled_code; } lazy_func_t; void compile_function_on_demand(lazy_func_t *func) { if (func->is_compiled) return; if (!func->is_parsed) { // Parse just this function parse_function_at(func->source_offset, func->source_length); func->is_parsed = true; } // Compile func->compiled_code = compile_single_function(func); func->is_compiled = true; // Free parse tree if not needed if (!keep_parse_trees) { free_function_parse_tree(func); } }
6. Memory Pooling by Type
Segregate allocations by lifetime:
typedef enum { POOL_PERMANENT, // Symbol tables, types POOL_FUNCTION, // Per-function data POOL_TEMPORARY, // Temporary during optimization POOL_CODEGEN // Code generation buffers } pool_type_t; typedef struct { arena_t pools[4]; pool_type_t active_pool; } memory_manager_t; void *mm_alloc(memory_manager_t *mm, size_t size, pool_type_t type) { return arena_alloc(&mm->pools[type], size); } void mm_free_pool(memory_manager_t *mm, pool_type_t type) { if (type != POOL_PERMANENT) { arena_reset(&mm->pools[type]); } } // Usage during compilation void compile_function(func_t *func) { set_active_pool(POOL_FUNCTION); parse_function(func); set_active_pool(POOL_TEMPORARY); optimize_function(func); mm_free_pool(mm, POOL_TEMPORARY); set_active_pool(POOL_CODEGEN); generate_code(func); // Keep only generated code mm_free_pool(mm, POOL_FUNCTION); }
Memory Benchmarks
Test Programs
// tests/memory/ // Wide program (many symbols) // 10,000 global variables int g0, g1, g2, ..., g9999; // Deep program (complex CFG) // 1,000 nested if statements if (a) { if (b) { if (c) { ... } } } // Long program (many instructions) // 100,000 simple statements x = 1; y = 2; z = 3; ...
Measurement Tools
Memory Profiler
// Add to src/debug.c typedef struct { size_t current_usage; size_t peak_usage; size_t allocations; size_t deallocations; size_t bytes_allocated; size_t bytes_freed; } memory_stats_t; void print_memory_report(memory_stats_t *stats) { printf("\n=== Memory Report ===\n"); printf("Peak Usage: %.2f MB\n", stats->peak_usage / 1048576.0); printf("Current Usage: %.2f MB\n", stats->current_usage / 1048576.0); printf("Allocations: %zu\n", stats->allocations); printf("Deallocations: %zu\n", stats->deallocations); printf("Leaked: %.2f MB\n", (stats->bytes_allocated - stats->bytes_freed) / 1048576.0); // Per-arena breakdown print_arena_stats(); }
Valgrind Integration
# Add to Makefile memcheck: out/shecc valgrind --leak-check=full --show-leak-kinds=all \ --track-origins=yes --verbose \ ./out/shecc -o /dev/null tests/large.c massif: out/shecc valgrind --tool=massif --pages-as-heap=yes \ --massif-out-file=massif.out \ ./out/shecc -o /dev/null tests/large.c ms_print massif.out
Testing Strategy
- Memory regression tests in CI
- Valgrind in test suite
- Large input tests (100K+ lines)
- Memory limit tests (compile with ulimit)
Success Criteria
- Meet memory targets for all benchmarks
- No memory leaks (Valgrind clean)
- Bootstrap with 50% less memory
- Compile 100K line programs in < 256MB
- No performance regression > 5%
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request