zig-utils/zig-regex
A modern, performant regular expression library for Zig.
A modern, high-performance regular expression library for Zig
Features • Installation • Quick Start • Documentation • Performance
zig-regex is a comprehensive regular expression engine for Zig featuring Thompson NFA construction with linear time complexity, extensive pattern support, and advanced optimization capabilities. Built with zero external dependencies and full memory control through Zig allocators.
| Feature | Syntax | Description |
|---|---|---|
| Literals | abc, 123 |
Match exact characters and strings |
| Quantifiers | *, +, ?, {n}, {m,n} |
Greedy repetition |
| Lazy Quantifiers | *?, +?, ??, {n,m}? |
Non-greedy repetition |
| Possessive Quantifiers | *+, ++, ?+, {n,m}+ |
Atomic repetition (no backtracking) |
| Alternation | a|b|c |
Match any alternative |
| Character Classes | \d, \w, \s, \D, \W, \S |
Predefined character sets |
| Custom Classes | [abc], [a-z], [^0-9] |
User-defined character sets |
| Unicode Classes | \p{Letter}, \p{Number}, \X |
Unicode property support |
| Anchors | ^, $, \A, \z, \Z, \b, \B |
Position matching |
| Wildcards | . |
Match any character |
| Groups | (...) |
Capturing groups |
| Named Groups | (?P<name>...), (?<name>...) |
Named capturing groups |
| Non-capturing | (?:...) |
Grouping without capture |
| Atomic Groups | (?>...) |
Possessive grouping |
| Lookahead | (?=...), (?!...) |
Positive/negative lookahead |
| Lookbehind | (?<=...), (?<!...) |
Positive/negative lookbehind |
| Backreferences | \1, \2, \k<name> |
Reference previous captures |
| Conditionals | (?(condition)yes|no) |
Conditional patterns |
| Escaping | \\, \., \n, \t, etc. |
Special character escaping |
compile, find, findAll, replace, replaceAll, split, iterator support// build.zig.zon
.{
.name = "your-project",
.version = "0.1.0",
.dependencies = .{
.regex = .{
.url = "https://github.com/zig-utils/zig-regex/archive/main.tar.gz",
.hash = "...", // zig will provide this
},
},
}
// build.zig
const regex = b.dependency("regex", .{
.target = target,
.optimize = optimize,
});
exe.root_module.addImport("regex", regex.module("regex"));
git clone https://github.com/zig-utils/zig-regex.git
cd zig-regex
zig build
const std = @import("std");
const Regex = @import("regex").Regex;
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
// Simple matching
const regex = try Regex.compile(allocator, "\\d{3}-\\d{4}");
defer regex.deinit();
if (try regex.find("Call me at 555-1234")) |match| {
std.debug.print("Found: {s}\n", .{match.slice}); // "555-1234"
}
}
const regex = try Regex.compile(allocator, "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})");
defer regex.deinit();
if (try regex.find("Date: 2024-03-15")) |match| {
const year = match.getCapture("year"); // "2024"
const month = match.getCapture("month"); // "03"
const day = match.getCapture("day"); // "15"
}
// Match any Unicode letter
const regex = try Regex.compile(allocator, "\\p{Letter}+");
// Match emoji
const emoji_regex = try Regex.compile(allocator, "\\p{Emoji}");
// Match grapheme clusters
const grapheme_regex = try Regex.compile(allocator, "\\X+");
// Prevent catastrophic backtracking
const regex = try Regex.compile(allocator, "(?>a+)b");
const poss_regex = try Regex.compile(allocator, "a++b");
// These won't match "aaaa" - no backtracking allowed
try std.testing.expect(try regex.find("aaaa") == null);
try std.testing.expect(try poss_regex.find("aaaa") == null);
// Match different patterns based on a condition
const regex = try Regex.compile(allocator, "(a)?(?(1)b|c)");
try std.testing.expectEqualStrings("ab", (try regex.find("ab")).?.slice);
try std.testing.expectEqualStrings("c", (try regex.find("c")).?.slice);
const Builder = @import("regex").Builder;
var builder = Builder.init(allocator);
defer builder.deinit();
const pattern = try builder
.startGroup()
.literal("https?://")
.oneOrMore(Builder.Patterns.word())
.literal(".")
.oneOrMore(Builder.Patterns.alpha())
.endGroup()
.build();
const regex = try Regex.compile(allocator, pattern);
defer regex.deinit();
const MacroRegistry = @import("regex").MacroRegistry;
const CommonMacros = @import("regex").CommonMacros;
var macros = MacroRegistry.init(allocator);
defer macros.deinit();
// Load common macros
try CommonMacros.loadInto(¯os);
// Define custom macros
try macros.define("phone", "\\d{3}-\\d{4}");
try macros.define("email", "${email_local}@${email_domain}");
// Expand macros in patterns
const pattern = try macros.expand("Contact: ${email} or ${phone}");
defer allocator.free(pattern);
zig-regex uses Thompson NFA construction to guarantee O(n×m) worst-case time complexity:
This prevents catastrophic backtracking that plagues traditional regex engines.
Pattern: /\d{3}-\d{4}/
Input: 1000-byte string
Time: ~850ns (M1 MacBook Pro)
Pattern: /(?:a|b)*c/
Input: 10000 'a's + 'c'
Time: Linear growth (no exponential backtracking)
Run benchmarks: zig build bench
# Build library
zig build
# Run tests
zig build test
# Run examples
zig build example
# Run benchmarks
zig build bench
# Generate documentation
zig build docs
See TODO.md for the complete development roadmap and planned features.
Contributions are welcome! Please:
MIT License - see LICENSE file for details.
Inspired by: