#! /usr/bin/env dub
/++ dub.sdl:
configuration "release" {
targetType "executable"
}
configuration "unittest" {
targetType "library"
dependency "silly" version="~>1.1.1"
}
+/
version (unittest) {
} else {
void main() {
import std.process : spawnProcess, wait;
spawnProcess(["dub", "test", "--single", __FILE__, "--", "--threads=1"]).wait;
}
}
/// char[] -> ubyte[] :: std.string.representation
/// - infalliable
/// - @safe
/// - non-allocating
/// - preserves qualifiers and also for wstring/dstring
@("std.string.representation")
nothrow @safe @nogc unittest {
import std.string : representation;
string s1 = "hello world";
immutable(ubyte)[] s2 = s1.representation;
assert(cast(size_t)s1.ptr == cast(size_t)s2.ptr);
dstring s3 = "hello world"d;
immutable(uint)[] s4 = s3.representation;
assert(cast(size_t)s3.ptr == cast(size_t)s4.ptr);
}
/// ubyte[] -> char[] :: std.string.assumeUTF
/// - throws AssertError in debug builds if not valid UTF
/// - @safe
/// - non-allocating
/// - preserves qualifiers and also for wstring/dstring
@("std.string.assumeUTF")
nothrow @safe @nogc unittest {
import std.string : assumeUTF;
immutable(ubyte)[2] s1 = [104, 105];
string s2 = s1.assumeUTF;
assert(s2 == "hi");
assert(cast(size_t)s1.ptr == cast(size_t)s2.ptr);
}
/// ubyte[] -> char[] :: cast(char[])
/// - can lead to decode errors later if input isn't valid UTF
/// - @safe when qualifiers aren't discarded
/// - non-allocating
/// consider std.encoding.isValid, returning a bool, or std.utf.validate,
/// throwing UTFException
@("cast(char[]) bytearray")
nothrow @safe @nogc unittest {
import std.string : representation;
immutable(ubyte)[] s1 = "hello world".representation;
string s2 = cast(immutable(char)[]) s1;
assert(cast(size_t)s1.ptr == cast(size_t)s2.ptr);
}
/// string mutability :: .dup, .idup, std.exception.assumeUnique, std.array.array
@(".dup .idup .assumeUnique .array")
@safe nothrow unittest {
import std.exception : assumeUnique, assumeWontThrow;
import std.algorithm : map;
import std.array : array;
char[] s1 = "hello world".dup; // copy string literal to get new mutable
string s2 = s1.idup; // copy mutable to get new immutable
// unsafe, but doesn't allocate or copy
(() @trusted @nogc {
string s3 = s1.assumeUnique;
})();
// .map just to illustrate: string -> range char -> char[]
// can throw if s2 isn't valid UTF
// .array copies range to get new mutable
char[] s4 = s2.map!(c => cast(char)(cast(ubyte)c+1)).array.assumeWontThrow;
assert(s4 == "ifmmp!xpsme");
}
/// char range autodecoding
/// not a type conversion, but a frequent need for them
@("avoiding autodecoding")
@safe unittest {
import std.algorithm : map, sum, count;
import std.exception : assertThrown;
import std.utf : UTFException, validate, byCodeUnit;
import std.string : representation;
import std.encoding : isValid;
char[] s1 = [167, 133, 175];
assertThrown!UTFException(s1.validate); // is invalid UTF8 sequence
assert(!s1.isValid); // still invalid UTF8 sequence
assert(s1.count == 3); // .count of a char[] happens to not autodecode
assertThrown!UTFException(s1.map!"1".sum == 3); // but .map does
assert(s1.representation.map!"1".sum == 3); // ubyte[] to avoid autodecoding
// same autodecode-avoidance as .representation while retaining char elementtype
assert(s1.byCodeUnit.map!"1".sum == 3);
}
/// string literal -> C string :: address of first char
/// - string literals always have a trailing NUL
/// - string literals also coerce directly to C strings
@("string literal conversion to C strings")
nothrow @safe @nogc unittest {
string s1 = "hello world";
const(char)* s2 = &s1[0];
const(char)* s3 = "hello world";
}
@("string literals have trailing NUL")
nothrow @nogc unittest {
string s1 = "hello world";
assert(s1.ptr[s1.length] == '\0');
}
/// string -> C string :: std.string.toStringz
/// - works for any D string
/// - @safe
/// - *always* allocates
/// - preserves qualifiers
@("std.string.toStringz")
nothrow @safe unittest {
import std.string : toStringz;
string s1 = "hello world";
const(char)* s2 = s1[0 .. 5].toStringz;
const(char)* s3 = s1[$-5 .. $].toStringz;
(() @trusted {
import core.stdc.string : strlen;
assert(s2[0 .. s2.strlen] == "hello");
assert(s3[0 .. s3.strlen] == "world");
assert(s1.ptr != s2);
})();
}
/// C string -> string :: std.string.fromStringz
/// - just slices the pointer
/// - doesn't allocate
/// - not @safe because it assumes NUL termination
@("std.string.fromStringz")
nothrow @nogc unittest {
import std.string : fromStringz;
import core.stdc.string : strlen;
const(char)* s1 = "hello world";
const(char)[] s2 = s1.fromStringz;
assert(&s1[0] == &s2[0]);
assert(s1[0 .. s1.strlen] == s2);
}
/// char[] <-> ubyte[] aren't symmetric:
/// char[] -> ubyte[] is always safe, error-free, and cheap
/// char[] <- ubyte[] has the possibility of encoding failure
/// so char[] <- ubyte[] options above are convenient but somewhat sloppy
/// for precise operations, use std.utf tooling
@("std.utf.decodeFront")
unittest {
const(char)[] s1 = [
'h', 'e', 'l', 'l', 'o', // valid UTF-8
167, 133, 175, // invalid
't', 'h', 'e', 'r', 'e' // valid
];
// take as much valid UTF-8 as we can
dstring takeUTF(const(char)[] s) {
import std.utf : decodeFront, UTFException;
dstring res;
while (s.length) {
try {
res ~= s.decodeFront;
} catch (UTFException _) {
break;
}
}
return res;
}
assert(takeUTF(s1) == "hello");
// I wrote most of this file without being aware of std.encoding at all :/
import std.encoding : validLength;
import std.algorithm : equal;
assert(takeUTF(s1).equal(s1[0 .. s1.validLength]));
}
/// char[] <-> wchar[] <-> dchar[] :: std.utf.byUTF family
/// - infallible (ignoring errors or using the Unicode 'replacement character')
/// - @safe
/// - non-allocating
@("std.utf.byUTF 1/3")
nothrow @nogc @safe unittest {
import std.utf : byChar, byWchar, byDchar;
import std.range : enumerate;
string noel = "no\u0308el";
char[6] chars = [110, 111, 204, 136, 101, 108];
dchar[5] dchars = [110, 111, 776, 101, 108];
foreach (size_t i, char c; noel.byChar.enumerate)
assert(c == chars[i]);
foreach (size_t i, dchar c; noel.byDchar.enumerate)
assert(c == dchars[i]);
}
/// can throw only if not using the Unicode replacement character
@("std.utf.byUTF 2/3 (throwing)")
@safe unittest {
import std.utf : byUTF, UTFException, UseReplacementDchar;
import std.exception : assertThrown, assertNotThrown;
import std.algorithm : count;
string partial = "hello\247\205\257there";
// byChar misses the bad UTF8 ...
assertNotThrown!UTFException(partial.byUTF!(char, UseReplacementDchar.no).count);
// byDchar objects to it
assertThrown!UTFException(partial.byUTF!(dchar, UseReplacementDchar.no).count);
}
@("stf.utf.byUTF 3/3 (replacement character)")
nothrow @nogc @safe unittest {
import std.utf : byUTF, byDchar, UseReplacementDchar;
import std.range : enumerate;
string partial = "hello\247\205\257there";
dchar[13] replaced = [104, 101, 108, 108, 111, 65533, 65533, 65533, 116, 104, 101, 114, 101];
foreach (size_t i, char c; partial.byUTF!(char, UseReplacementDchar.yes).enumerate)
assert(c == partial[i]); // invalid UTF ignored and passed through
foreach (size_t i, dchar c; partial.byUTF!(dchar, UseReplacementDchar.yes).enumerate)
assert(c == replaced[i]);
foreach (size_t i, dchar c; partial.byDchar.enumerate)
assert(c == replaced[i]);
}
/// byUTF returns a range so can replace characters within @nogc
/// sanitize has a simpler interface that allocates and isn't @safe.
/// so which should you use?
/// sanitize is *much* faster, more than twice as fast as simple byDchar.array
/// competition, and still 20% faster than non-allocating byDchar uses.
/// ... and this is if sanization is necessary. In the very common case of
/// already sanitized inputs, sanitize is 4-6x faster and doesn't allocate.
@("std.encoding.sanitize")
nothrow unittest {
import std.encoding : sanitize;
import std.utf : toUTF8;
string partial = "hello\247\205\257there";
dchar[] replaced = [104, 101, 108, 108, 111, 65533, 65533, 65533, 116, 104, 101, 114, 101];
assert(partial.sanitize == replaced.toUTF8);
}
/// string <-> wstring <-> dstring <-> C string :: stf.utf.toUTF* family
/// - infallible (in same way as byUTF family)
/// - @safe
/// - always allocates
@("stf.utf.toUTF8 1/2")
nothrow @safe unittest {
import std.utf : toUTF8, toUTF16, toUTF32, toUTFz, toUTF16z;
string noel = "no\u0308el";
char[6] chars = [110, 111, 204, 136, 101, 108];
dchar[5] dchars = [110, 111, 776, 101, 108];
assert(noel.toUTF8 == chars);
assert(noel.toUTF32 == dchars);
}
@("std.utf.toUTF 2/2")
nothrow @safe unittest {
import std.utf : toUTF8, toUTF32;
string partial = "hello\247\205\257there";
dchar[13] replaced = [104, 101, 108, 108, 111, 65533, 65533, 65533, 116, 104, 101, 114, 101];
assert(partial.toUTF8 == partial);
assert(partial.toUTF8.ptr != partial.ptr);
assert(partial.toUTF32 == replaced);
string hello = "hello"d.toUTF8;
dstring there = "there".toUTF32;
}
/// string <-> wcstring <-> dstring :: std.conv.to
/// - usually fallible, throws on invalid UTF
/// - @safe
/// - usually allocates (doesn't when idempotent)
@("stf.conv.to 1/4 (can throw)")
@safe unittest {
import std.conv : to;
import std.utf : UTFException;
import std.exception : assertThrown;
string partial = "hello\247\205\257there";
assertThrown!UTFException(partial.to!dstring);
}
@("stf.conv.to 2/4 (idempotent use)")
nothrow @nogc @safe unittest {
import std.conv : to;
dstring s1 = "hello"d;
dstring s2 = s1.to!dstring;
assert(s1.ptr == s2.ptr);
}
@("stf.conv.to 3/4 (doesn't understand char[] -> ubyte[])")
@safe unittest {
import std.conv : to, ConvException;
import std.exception : assertThrown;
const(char)[] s1 = "hello";
assertThrown!ConvException(s1.to!(ubyte[]));
}
@("stf.conv.to 4/4 (conv doesn't do what you want with ubyte[] -> string)")
@safe unittest {
import std.conv : to;
ubyte[5] s1 = ['h', 'e', 'l', 'l', 'o'];
assert(s1.to!string == "[104, 101, 108, 108, 111]");
}