Skip to content

Commit d636e33

Browse files
Define a mutable string type
1 parent 6d908aa commit d636e33

File tree

2 files changed

+434
-0
lines changed

2 files changed

+434
-0
lines changed

src/common/src/mlib/str.h

Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,4 +565,353 @@ mstr_contains_any_of(mstr_view str, mstr_view needle)
565565
}
566566
#define mstr_contains_any_of(Str, Needle) mstr_contains_any_of(mstr_view_from(Str), mstr_view_from(Needle))
567567

568+
569+
/**
570+
* @brief A simple mutable string type, with a guaranteed null terminator.
571+
*
572+
* This type is a trivially relocatable aggregate type that contains a pointer `data`
573+
* and a size `len`. If not null, the pointer `data` points to an array of mutable
574+
* `char` of length `len + 1`, where the character at `data[len]` is always zero,
575+
* and must not be modified.
576+
*/
577+
typedef struct mstr {
578+
/**
579+
* @brief Pointer to the first char in the string, or NULL if
580+
* the string is null.
581+
*
582+
* The pointed-to character array has a length of `len + 1`, where
583+
* the character at `data[len]` is always null.
584+
*
585+
* @warning Attempting to overwrite the null character at `data[len]`
586+
* will result in undefined behavior!
587+
*
588+
* @note An empty string is not equivalent to a null string! An empty string
589+
* will still point to an array of length 1, where the only char is the null
590+
* terminator.
591+
*/
592+
char *data;
593+
/**
594+
* @brief The number of characters in the array pointed-to by `data`
595+
* that preceed the null terminator.
596+
*/
597+
size_t len;
598+
} mstr;
599+
600+
601+
/**
602+
* @brief Resize an existing or null `mstr`, without initializing any of the
603+
* added content other than the null terminator. This operation is potientially
604+
* UNSAFE, because it gives uninitialized memory to the caller.
605+
*
606+
* @param str Pointer to a valid `mstr`, or a null `mstr`.
607+
* @param new_len The new length of the string.
608+
* @return true If the operation succeeds
609+
* @return false Otherwise
610+
*
611+
* If `str` is a null string, this function will initialize a new `mstr` object
612+
* on-the-fly.
613+
*
614+
* If the operation increases the length of the string (or initializes a new string),
615+
* then the new `char` in `str.data[str.len : new_len] will contain uninitialized
616+
* values. The char at `str.data[new_len]` WILL be set to zero, to ensure there
617+
* is a null terminator. The caller should always initialize the new string
618+
* content to ensure that the string has a specified value.
619+
*/
620+
static inline bool
621+
mstr_resize_for_overwrite(mstr *const str, const size_t new_len)
622+
{
623+
// We need to allocate one additional char to hold the null terminator
624+
size_t alloc_size = new_len;
625+
if (mlib_add(&alloc_size, 1) || alloc_size > SSIZE_MAX) {
626+
// Allocation size is too large
627+
return false;
628+
}
629+
// Try to (re)allocate the region
630+
char *data = (char *)realloc(str->data, alloc_size);
631+
if (!data) {
632+
// Failed to (re)allocate
633+
return false;
634+
}
635+
// Note: We do not initialize any of the data in the newly allocated region.
636+
// We only set the null terminator. It is up to the caller to do the rest of
637+
// the init.
638+
data[new_len] = 0;
639+
// Update the final object
640+
str->data = data;
641+
str->len = new_len;
642+
// Success
643+
return true;
644+
}
645+
646+
/**
647+
* @brief Given an existing `mstr`, resize it to hold `new_len` chars
648+
*
649+
* @param str Pointer to a string object to update, or a null `mstr`
650+
* @param new_len The new length of the string, not including the implicit null terminator
651+
* @return true If the operation succeeds
652+
* @return false Otherwise
653+
*
654+
* @note If the operation fails, then `*str` is not modified.
655+
*/
656+
static inline bool
657+
mstr_resize(mstr *str, size_t new_len)
658+
{
659+
const size_t old_len = str->len;
660+
if (!mstr_resize_for_overwrite(str, new_len)) {
661+
// Failed to allocate new storage for the string
662+
return false;
663+
}
664+
// Check how many chars we added/removed
665+
const ptrdiff_t len_diff = new_len - str->len;
666+
if (len_diff > 0) {
667+
// We added new chars. Zero-init all the new chars
668+
memset(str->data + old_len, 0, (size_t)len_diff);
669+
}
670+
// Success
671+
return true;
672+
}
673+
674+
/**
675+
* @brief Create a new `mstr` of the given length
676+
*
677+
* @param new_len The length of the new string, in characters, not including the null terminator
678+
* @return mstr A new string. The string's `data` member is NULL in case of failure
679+
*
680+
* The character array allocated for the string will always be `new_len + 1` `char` in length,
681+
* where the char at the index `new_len` is a null terminator. This means that a string of
682+
* length zero will allocate a single character to store the null terminator.
683+
*
684+
* All characters in the new string are initialize to zero. If you want uninitialized
685+
* string content, use `mstr_resize_for_overwrite`.
686+
*/
687+
static inline mstr
688+
mstr_new(size_t new_len)
689+
{
690+
mstr ret = {NULL, 0};
691+
// We can rely on `resize` to handle the null state properly.
692+
mstr_resize(&ret, new_len);
693+
return ret;
694+
}
695+
696+
/**
697+
* @brief Delete an `mstr` that was created with an allocating API, including
698+
* the resize APIs
699+
*
700+
* @param s An `mstr` object. If the object is null, this function is a no-op.
701+
*
702+
* After this call, the value of the `s` object has been consumed and is invalid.
703+
*/
704+
static inline void
705+
mstr_delete(mstr s)
706+
{
707+
free(s.data);
708+
}
709+
710+
/**
711+
* @brief Replace the content of the given string, attempting to reuse the buffer
712+
*
713+
* @param inout Pointer to a valid or null `mstr` to be replaced
714+
* @param s The new string contents
715+
* @return true If the operation succeeded
716+
* @return false Otherwise
717+
*
718+
* If the operation fails, `*inout` is not modified
719+
*/
720+
static inline bool
721+
mstr_assign(mstr *inout, mstr_view s)
722+
{
723+
if (!mstr_resize_for_overwrite(inout, s.len)) {
724+
return false;
725+
}
726+
memcpy(inout->data, s.data, s.len);
727+
return true;
728+
}
729+
730+
#define mstr_assign(InOut, S) mstr_assign((InOut), mstr_view_from((S)))
731+
732+
/**
733+
* @brief Create a mutable copy of the given string.
734+
*
735+
* @param sv The string to be copied
736+
* @return mstr A new valid string, or a null string in case of allocation failure.
737+
*/
738+
static inline mstr
739+
mstr_copy(mstr_view sv)
740+
{
741+
mstr ret = {NULL, 0};
742+
mstr_assign(&ret, sv);
743+
return ret;
744+
}
745+
746+
#define mstr_copy(S) mstr_copy(mstr_view_from((S)))
747+
#define mstr_copy_cstring(S) mstr_copy(mstr_cstring((S)))
748+
749+
/**
750+
* @brief Concatenate two strings into a new mutable string
751+
*
752+
* @param a The left-hand string to be concatenated
753+
* @param b The right-hand string to be concatenated
754+
* @return mstr A new valid string composed by concatenating `a` with `b`, or
755+
* a null string in case of allocation failure.
756+
*/
757+
static inline mstr
758+
mstr_concat(mstr_view a, mstr_view b)
759+
{
760+
mstr ret = {NULL, 0};
761+
size_t cat_len;
762+
if (mlib_add(&cat_len, a.len, b.len)) {
763+
// Size would overflow. No go.
764+
return ret;
765+
}
766+
// Prepare the new string
767+
if (!mstr_resize_for_overwrite(&ret, cat_len)) {
768+
// Failed to allocate. The ret string is still null, and we can just return it
769+
return ret;
770+
}
771+
// Copy in the characters from `a`
772+
char *out = ret.data;
773+
memcpy(out, a.data, a.len);
774+
// Copy in the characters from `b`
775+
out += a.len;
776+
memcpy(out, b.data, b.len);
777+
// Success
778+
return ret;
779+
}
780+
781+
#define mstr_concat(A, B) mstr_concat(mstr_view_from((A)), mstr_view_from((B)))
782+
783+
/**
784+
* @brief Delete and/or insert characters into a string
785+
*
786+
* @param str The string object to be updated
787+
* @param splice_pos The position at which to do the splice
788+
* @param n_delete The number of characters to delete at `splice_pos`
789+
* @param insert A string to be inserted at `split_pos` after chars are deleted
790+
* @return true If the operation succeeds
791+
* @return false Otherwise
792+
*
793+
* If `n_delete` is zero, then no characters are deleted. If `insert` is empty
794+
* or null, then no characters are inserted.
795+
*/
796+
static inline bool
797+
mstr_splice(mstr *str, size_t splice_pos, size_t n_delete, mstr_view insert)
798+
{
799+
mlib_check(splice_pos <= str->len);
800+
// How many chars is it possible to delete from `splice_pos`?
801+
size_t n_chars_avail_to_delete = str->len - splice_pos;
802+
mlib_check(n_delete <= n_chars_avail_to_delete);
803+
// Compute the new string length
804+
size_t new_len = str->len;
805+
// This should never fail, because we should try to delete more chars than we have
806+
mlib_check(!mlib_sub(&new_len, n_delete));
807+
// Check if appending would make too big of a string
808+
if (mlib_add(&new_len, insert.len)) {
809+
// New string will be too long
810+
return false;
811+
}
812+
char *mut = str->data;
813+
// We either resize first or resize last, depending on where we are shifting chars
814+
if (new_len > str->len) {
815+
// Do the resize first
816+
if (!mstr_resize_for_overwrite(str, new_len)) {
817+
// Failed to allocate
818+
return false;
819+
}
820+
mut = str->data;
821+
}
822+
// Move to the splice position
823+
mut += splice_pos;
824+
// Shift the existing string parts around for the deletion operation
825+
const size_t tail_len = n_chars_avail_to_delete - n_delete;
826+
// Adjust to the begining of the string part that we want to keep
827+
char *copy_from = mut + n_delete;
828+
char *copy_to = mut + insert.len;
829+
memmove(copy_to, copy_from, tail_len);
830+
if (new_len < str->len) {
831+
// We didn't resize first, so resize now. We are shrinking the string, so this
832+
// will never fail, and does not create any uninitialized memory:
833+
mlib_check(mstr_resize_for_overwrite(str, new_len));
834+
mut = str->data + splice_pos;
835+
}
836+
// Insert the new data
837+
memcpy(mut, insert.data, insert.len);
838+
return true;
839+
}
840+
841+
/**
842+
* @brief Append a string to the end of some other string.
843+
*
844+
* @param str The string to be modified
845+
* @param suffix The suffix string to be appended onto `*str`
846+
* @return true If the operation was successful
847+
* @return false Otherwise
848+
*
849+
* If case of failure, `*str` is not modified.
850+
*/
851+
static inline bool
852+
mstr_append(mstr *str, mstr_view suffix)
853+
{
854+
return mstr_splice(str, str->len, 0, suffix);
855+
}
856+
857+
#define mstr_append(Into, Suffix) mstr_append((Into), mstr_view_from((Suffix)))
858+
859+
/**
860+
* @brief Append a single character to the given string object
861+
*
862+
* @param str The string object to be updated
863+
* @param c The single character that will be inserted at the end
864+
* @return true If the operation succeeded
865+
* @return false Otherwise
866+
*
867+
* In case of failure, the string is not modified.
868+
*/
869+
static inline bool
870+
mstr_pushchar(mstr *str, char c)
871+
{
872+
mstr_view one = mstr_view_data(&c, 1);
873+
return mstr_append(str, one);
874+
}
875+
876+
/**
877+
* @brief Replace every occurrence of `needle` in `str` with `sub`
878+
*
879+
* @param str The string object to be updated
880+
* @param needle The non-empty needle string to be searched for.s
881+
* @param sub The string to be inserted in place of each `needle`
882+
* @return true If the operation succeeds
883+
* @return false Otherwise
884+
*
885+
* @warning If the `needle` string is empty, then the program will terminate!
886+
* @note If the operation fails, the content of `str` is an unspecified but valid
887+
* string.
888+
*/
889+
static inline bool
890+
mstr_replace(mstr *str, mstr_view needle, mstr_view sub)
891+
{
892+
mlib_check(needle.len, neq, 0, because, "Trying to replace an empty string will result in an infinite loop");
893+
// Scan forward, starting from the first position:
894+
size_t off = 0;
895+
while (1) {
896+
// Find the next occurrence, starting from the scan offset
897+
off = mstr_find(*str, needle, off);
898+
if (off == SIZE_MAX) {
899+
// No more occurrences.
900+
return true;
901+
}
902+
// Replace the needle string with the new value
903+
if (!mstr_splice(str, off, needle.len, sub)) {
904+
return false;
905+
}
906+
// Advance over the length of the replacement string, so we don't try to
907+
// infinitely replace content if the replacement itself contains the needle
908+
// string
909+
if (mlib_add(&off, sub.len)) {
910+
// Integer overflow while advancing the offset. No good.
911+
return false;
912+
}
913+
}
914+
}
915+
916+
568917
#endif // MLIB_STR_H_INCLUDED

0 commit comments

Comments
 (0)