2024-11-20 05:00:33 +02:00

1508 lines
50 KiB
C

/*
MIT License
Copyright (c) 2024 Leonard Iklé
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include <assert.h>
#include <ctype.h>
#include <cwalk.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
/**
* We try to default to a different path style depending on the operating
* system. So this should detect whether we should use windows or unix paths.
*/
/*#if defined(WIN32) || defined(_WIN32) || \
defined(__WIN32) && !defined(__CYGWIN__)
static enum cwk_path_style path_style = CWK_STYLE_WINDOWS;
#else
static enum cwk_path_style path_style = CWK_STYLE_UNIX;
#endif*/
/* This will extract the path style of the running process */
extern enum cwk_path_style *__cwalk_path_style(void);
#define path_style (*__cwalk_path_style())
/**
* This is a list of separators used in different styles. Windows can read
* multiple separators, but it generally outputs just a backslash. The output
* will always use the first character for the output.
*/
static const char *separators[] = {
"\\/", // CWK_STYLE_WINDOWS
"/" // CWK_STYLE_UNIX
};
/**
* A joined path represents multiple path strings which are concatenated, but
* not (necessarily) stored in contiguous memory. The joined path allows to
* iterate over the segments as if it was one piece of path.
*/
struct cwk_segment_joined
{
struct cwk_segment segment;
const char **paths;
size_t path_index;
};
static size_t cwk_path_output_sized(char *buffer, size_t buffer_size,
size_t position, const char *str, size_t length)
{
size_t amount_written;
// First we determine the amount which we can write to the buffer. There are
// three cases. In the first case we have enough to store the whole string in
// it. In the second one we can only store a part of it, and in the third we
// have no space left.
if (buffer_size > position + length) {
amount_written = length;
} else if (buffer_size > position) {
amount_written = buffer_size - position;
} else {
amount_written = 0;
}
// If we actually want to write out something we will do that here. We will
// always append a '\0', this way we are guaranteed to have a valid string at
// all times.
if (amount_written > 0) {
memmove(&buffer[position], str, amount_written);
}
// Return the theoretical length which would have been written when everything
// would have fit in the buffer.
return length;
}
static size_t cwk_path_output_current(char *buffer, size_t buffer_size,
size_t position)
{
// We output a "current" directory, which is a single character. This
// character is currently not style dependant.
return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);
}
static size_t cwk_path_output_back(char *buffer, size_t buffer_size,
size_t position)
{
// We output a "back" directory, which ahs two characters. This
// character is currently not style dependant.
return cwk_path_output_sized(buffer, buffer_size, position, "..", 2);
}
static size_t cwk_path_output_separator(char *buffer, size_t buffer_size,
size_t position)
{
// We output a separator, which is a single character.
return cwk_path_output_sized(buffer, buffer_size, position,
separators[path_style], 1);
}
static size_t cwk_path_output_dot(char *buffer, size_t buffer_size,
size_t position)
{
// We output a dot, which is a single character. This is used for extensions.
return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);
}
static size_t cwk_path_output(char *buffer, size_t buffer_size, size_t position,
const char *str)
{
size_t length;
// This just does a sized output internally, but first measuring the
// null-terminated string.
length = strlen(str);
return cwk_path_output_sized(buffer, buffer_size, position, str, length);
}
static void cwk_path_terminate_output(char *buffer, size_t buffer_size,
size_t pos)
{
if (buffer_size > 0) {
if (pos >= buffer_size) {
buffer[buffer_size - 1] = '\0';
} else {
buffer[pos] = '\0';
}
}
}
static bool cwk_path_is_string_equal(const char *first, const char *second,
size_t first_size, size_t second_size)
{
bool are_both_separators;
// The two strings are not equal if the sizes are not equal.
if (first_size != second_size) {
return false;
}
// If the path style is UNIX, we will compare case sensitively. This can be
// done easily using strncmp.
if (path_style == CWK_STYLE_UNIX) {
return strncmp(first, second, first_size) == 0;
}
// However, if this is windows we will have to compare case insensitively.
// Since there is no standard method to do that we will have to do it on our
// own.
while (*first && *second && first_size > 0) {
// We can consider the string to be not equal if the two lowercase
// characters are not equal. The two chars may also be separators, which
// means they would be equal.
are_both_separators = strchr(separators[path_style], *first) != NULL &&
strchr(separators[path_style], *second) != NULL;
if (tolower(*first) != tolower(*second) && !are_both_separators) {
return false;
}
first++;
second++;
--first_size;
}
// The string must be equal since they both have the same length and all the
// characters are the same.
return true;
}
static const char *cwk_path_find_next_stop(const char *c)
{
// We just move forward until we find a '\0' or a separator, which will be our
// next "stop".
while (*c != '\0' && !cwk_path_is_separator(c)) {
++c;
}
// Return the pointer of the next stop.
return c;
}
static const char *cwk_path_find_previous_stop(const char *begin, const char *c)
{
// We just move back until we find a separator or reach the beginning of the
// path, which will be our previous "stop".
while (c > begin && !cwk_path_is_separator(c)) {
--c;
}
// Return the pointer to the previous stop. We have to return the first
// character after the separator, not on the separator itself.
if (cwk_path_is_separator(c)) {
return c + 1;
} else {
return c;
}
}
static bool cwk_path_get_first_segment_without_root(const char *path,
const char *segments, struct cwk_segment *segment)
{
// Let's remember the path. We will move the path pointer afterwards, that's
// why this has to be done first.
segment->path = path;
segment->segments = segments;
segment->begin = segments;
segment->end = segments;
segment->size = 0;
// Now let's check whether this is an empty string. An empty string has no
// segment it could use.
if (*segments == '\0') {
return false;
}
// If the string starts with separators, we will jump over those. If there is
// only a slash and a '\0' after it, we can't determine the first segment
// since there is none.
while (cwk_path_is_separator(segments)) {
++segments;
if (*segments == '\0') {
return false;
}
}
// So this is the beginning of our segment.
segment->begin = segments;
// Now let's determine the end of the segment, which we do by moving the path
// pointer further until we find a separator.
segments = cwk_path_find_next_stop(segments);
// And finally, calculate the size of the segment by subtracting the position
// from the end.
segment->size = (size_t)(segments - segment->begin);
segment->end = segments;
// Tell the caller that we found a segment.
return true;
}
static bool cwk_path_get_last_segment_without_root(const char *path,
struct cwk_segment *segment)
{
// Now this is fairly similar to the normal algorithm, however, it will assume
// that there is no root in the path. So we grab the first segment at this
// position, assuming there is no root.
if (!cwk_path_get_first_segment_without_root(path, path, segment)) {
return false;
}
// Now we find our last segment. The segment struct of the caller
// will contain the last segment, since the function we call here will not
// change the segment struct when it reaches the end.
while (cwk_path_get_next_segment(segment)) {
// We just loop until there is no other segment left.
}
return true;
}
static bool cwk_path_get_first_segment_joined(const char **paths,
struct cwk_segment_joined *sj)
{
bool result;
// Prepare the first segment. We position the joined segment on the first path
// and assign the path array to the struct.
sj->path_index = 0;
sj->paths = paths;
// We loop through all paths until we find one which has a segment. The result
// is stored in a variable, so we can let the caller know whether we found one
// or not.
result = false;
while (paths[sj->path_index] != NULL &&
(result = cwk_path_get_first_segment(paths[sj->path_index],
&sj->segment)) == false) {
++sj->path_index;
}
return result;
}
static bool cwk_path_get_next_segment_joined(struct cwk_segment_joined *sj)
{
bool result;
if (sj->paths[sj->path_index] == NULL) {
// We reached already the end of all paths, so there is no other segment
// left.
return false;
} else if (cwk_path_get_next_segment(&sj->segment)) {
// There was another segment on the current path, so we are good to
// continue.
return true;
}
// We try to move to the next path which has a segment available. We must at
// least move one further since the current path reached the end.
result = false;
do {
++sj->path_index;
// And we obviously have to stop this loop if there are no more paths left.
if (sj->paths[sj->path_index] == NULL) {
break;
}
// Grab the first segment of the next path and determine whether this path
// has anything useful in it. There is one more thing we have to consider
// here - for the first time we do this we want to skip the root, but
// afterwards we will consider that to be part of the segments.
result = cwk_path_get_first_segment_without_root(sj->paths[sj->path_index],
sj->paths[sj->path_index], &sj->segment);
} while (!result);
// Finally, report the result back to the caller.
return result;
}
static bool cwk_path_get_previous_segment_joined(struct cwk_segment_joined *sj)
{
bool result;
if (*sj->paths == NULL) {
// It's possible that there is no initialized segment available in the
// struct since there are no paths. In that case we can return false, since
// there is no previous segment.
return false;
} else if (cwk_path_get_previous_segment(&sj->segment)) {
// Now we try to get the previous segment from the current path. If we can
// do that successfully, we can let the caller know that we found one.
return true;
}
result = false;
do {
// We are done once we reached index 0. In that case there are no more
// segments left.
if (sj->path_index == 0) {
break;
}
// There is another path which we have to inspect. So we decrease the path
// index.
--sj->path_index;
// If this is the first path we will have to consider that this path might
// include a root, otherwise we just treat is as a segment.
if (sj->path_index == 0) {
result = cwk_path_get_last_segment(sj->paths[sj->path_index],
&sj->segment);
} else {
result = cwk_path_get_last_segment_without_root(sj->paths[sj->path_index],
&sj->segment);
}
} while (!result);
return result;
}
static bool cwk_path_segment_back_will_be_removed(struct cwk_segment_joined *sj)
{
enum cwk_segment_type type;
int counter;
// We are handling back segments here. We must verify how many back segments
// and how many normal segments come before this one to decide whether we keep
// or remove it.
// The counter determines how many normal segments are our current segment,
// which will popped off before us. If the counter goes above zero it means
// that our segment will be popped as well.
counter = 0;
// We loop over all previous segments until we either reach the beginning,
// which means our segment will not be dropped or the counter goes above zero.
while (cwk_path_get_previous_segment_joined(sj)) {
// Now grab the type. The type determines whether we will increase or
// decrease the counter. We don't handle a CWK_CURRENT frame here since it
// has no influence.
type = cwk_path_get_segment_type(&sj->segment);
if (type == CWK_NORMAL) {
// This is a normal segment. The normal segment will increase the counter
// since it neutralizes one back segment. If we go above zero we can
// return immediately.
++counter;
if (counter > 0) {
return true;
}
} else if (type == CWK_BACK) {
// A CWK_BACK segment will reduce the counter by one. We can not remove a
// back segment as long we are not above zero since we don't have the
// opposite normal segment which we would remove.
--counter;
}
}
// We never got a count larger than zero, so we will keep this segment alive.
return false;
}
static bool cwk_path_segment_normal_will_be_removed(
struct cwk_segment_joined *sj)
{
enum cwk_segment_type type;
int counter;
// The counter determines how many segments are above our current segment,
// which will popped off before us. If the counter goes below zero it means
// that our segment will be popped as well.
counter = 0;
// We loop over all following segments until we either reach the end, which
// means our segment will not be dropped or the counter goes below zero.
while (cwk_path_get_next_segment_joined(sj)) {
// First, grab the type. The type determines whether we will increase or
// decrease the counter. We don't handle a CWK_CURRENT frame here since it
// has no influence.
type = cwk_path_get_segment_type(&sj->segment);
if (type == CWK_NORMAL) {
// This is a normal segment. The normal segment will increase the counter
// since it will be removed by a "../" before us.
++counter;
} else if (type == CWK_BACK) {
// A CWK_BACK segment will reduce the counter by one. If we are below zero
// we can return immediately.
--counter;
if (counter < 0) {
return true;
}
}
}
// We never got a negative count, so we will keep this segment alive.
return false;
}
static bool
cwk_path_segment_will_be_removed(const struct cwk_segment_joined *sj,
bool absolute)
{
enum cwk_segment_type type;
struct cwk_segment_joined sjc;
// We copy the joined path so we don't need to modify it.
sjc = *sj;
// First we check whether this is a CWK_CURRENT or CWK_BACK segment, since
// those will always be dropped.
type = cwk_path_get_segment_type(&sj->segment);
if (type == CWK_CURRENT || (type == CWK_BACK && absolute)) {
return true;
} else if (type == CWK_BACK) {
return cwk_path_segment_back_will_be_removed(&sjc);
} else {
return cwk_path_segment_normal_will_be_removed(&sjc);
}
}
static bool
cwk_path_segment_joined_skip_invisible(struct cwk_segment_joined *sj,
bool absolute)
{
while (cwk_path_segment_will_be_removed(sj, absolute)) {
if (!cwk_path_get_next_segment_joined(sj)) {
return false;
}
}
return true;
}
static void cwk_path_get_root_windows(const char *path, size_t *length)
{
const char *c;
bool is_device_path;
// We can not determine the root if this is an empty string. So we set the
// root to NULL and the length to zero and cancel the whole thing.
c = path;
*length = 0;
if (!*c) {
return;
}
// Now we have to verify whether this is a windows network path (UNC), which
// we will consider our root.
if (cwk_path_is_separator(c)) {
++c;
// Check whether the path starts with a single backslash, which means this
// is not a network path - just a normal path starting with a backslash.
if (!cwk_path_is_separator(c)) {
// Okay, this is not a network path but we still use the backslash as a
// root.
++(*length);
return;
}
// A device path is a path which starts with "\\." or "\\?". A device path
// can be a UNC path as well, in which case it will take up one more
// segment. So, this is a network or device path. Skip the previous
// separator. Now we need to determine whether this is a device path. We
// might advance one character here if the server name starts with a '?' or
// a '.', but that's fine since we will search for a separator afterwards
// anyway.
++c;
is_device_path = (*c == '?' || *c == '.') && cwk_path_is_separator(++c);
if (is_device_path) {
// That's a device path, and the root must be either "\\.\" or "\\?\"
// which is 4 characters long. (at least that's how Windows
// GetFullPathName behaves.)
*length = 4;
return;
}
// We will grab anything up to the next stop. The next stop might be a '\0'
// or another separator. That will be the server name.
c = cwk_path_find_next_stop(c);
// If this is a separator and not the end of a string we wil have to include
// it. However, if this is a '\0' we must not skip it.
while (cwk_path_is_separator(c)) {
++c;
}
// We are now skipping the shared folder name, which will end after the
// next stop.
c = cwk_path_find_next_stop(c);
// Then there might be a separator at the end. We will include that as well,
// it will mark the path as absolute.
if (cwk_path_is_separator(c)) {
++c;
}
// Finally, calculate the size of the root.
*length = (size_t)(c - path);
return;
}
// Move to the next and check whether this is a colon.
if (*++c == ':') {
*length = 2;
// Now check whether this is a backslash (or slash). If it is not, we could
// assume that the next character is a '\0' if it is a valid path. However,
// we will not assume that - since ':' is not valid in a path it must be a
// mistake by the caller than. We will try to understand it anyway.
if (cwk_path_is_separator(++c)) {
*length = 3;
}
}
}
static void cwk_path_get_root_unix(const char *path, size_t *length)
{
// The slash of the unix path represents the root. There is no root if there
// is no slash.
if (cwk_path_is_separator(path)) {
*length = 1;
} else {
*length = 0;
}
}
static bool cwk_path_is_root_absolute(const char *path, size_t length)
{
// This is definitely not absolute if there is no root.
if (length == 0) {
return false;
}
// If there is a separator at the end of the root, we can safely consider this
// to be an absolute path.
return cwk_path_is_separator(&path[length - 1]);
}
static void cwk_path_fix_root(char *buffer, size_t buffer_size, size_t length)
{
size_t i;
// This only affects windows.
if (path_style != CWK_STYLE_WINDOWS) {
return;
}
// Make sure we are not writing further than we are actually allowed to.
if (length > buffer_size) {
length = buffer_size;
}
// Replace all forward slashes with backwards slashes. Since this is windows
// we can't have any forward slashes in the root.
for (i = 0; i < length; ++i) {
if (cwk_path_is_separator(&buffer[i])) {
buffer[i] = *separators[CWK_STYLE_WINDOWS];
}
}
}
static size_t cwk_path_join_and_normalize_multiple(const char **paths,
char *buffer, size_t buffer_size)
{
size_t pos;
bool absolute, has_segment_output;
struct cwk_segment_joined sj;
// We initialize the position after the root, which should get us started.
cwk_path_get_root(paths[0], &pos);
// Determine whether the path is absolute or not. We need that to determine
// later on whether we can remove superfluous "../" or not.
absolute = cwk_path_is_root_absolute(paths[0], pos);
// First copy the root to the output. After copying, we will normalize the
// root.
cwk_path_output_sized(buffer, buffer_size, 0, paths[0], pos);
cwk_path_fix_root(buffer, buffer_size, pos);
// So we just grab the first segment. If there is no segment we will always
// output a "/", since we currently only support absolute paths here.
if (!cwk_path_get_first_segment_joined(paths, &sj)) {
goto done;
}
// Let's assume that we don't have any segment output for now. We will toggle
// this flag once there is some output.
has_segment_output = false;
do {
// Check whether we have to drop this segment because of resolving a
// relative path or because it is a CWK_CURRENT segment.
if (cwk_path_segment_will_be_removed(&sj, absolute)) {
continue;
}
// We add a separator if we previously wrote a segment. The last segment
// must not have a trailing separator. This must happen before the segment
// output, since we would override the null terminating character with
// reused buffers if this was done afterwards.
if (has_segment_output) {
pos += cwk_path_output_separator(buffer, buffer_size, pos);
}
// Remember that we have segment output, so we can handle the trailing slash
// later on. This is necessary since we might have segments but they are all
// removed.
has_segment_output = true;
// Write out the segment but keep in mind that we need to follow the
// buffer size limitations. That's why we use the path output functions
// here.
pos += cwk_path_output_sized(buffer, buffer_size, pos, sj.segment.begin,
sj.segment.size);
} while (cwk_path_get_next_segment_joined(&sj));
// Remove the trailing slash, but only if we have segment output. We don't
// want to remove anything from the root.
if (!has_segment_output && pos == 0) {
// This may happen if the path is absolute and all segments have been
// removed. We can not have an empty output - and empty output means we stay
// in the current directory. So we will output a ".".
assert(absolute == false);
pos += cwk_path_output_current(buffer, buffer_size, pos);
}
// We must append a '\0' in any case, unless the buffer size is zero. If the
// buffer size is zero, which means we can not.
done:
cwk_path_terminate_output(buffer, buffer_size, pos);
// And finally let our caller know about the total size of the normalized
// path.
return pos;
}
size_t cwk_path_get_absolute(const char *base, const char *path, char *buffer,
size_t buffer_size)
{
size_t i;
const char *paths[4];
// The basename should be an absolute path if the caller is using the API
// correctly. However, he might not and in that case we will append a fake
// root at the beginning.
if (cwk_path_is_absolute(base)) {
i = 0;
} else if (path_style == CWK_STYLE_WINDOWS) {
paths[0] = "\\";
i = 1;
} else {
paths[0] = "/";
i = 1;
}
if (cwk_path_is_absolute(path)) {
// If the submitted path is not relative the base path becomes irrelevant.
// We will only normalize the submitted path instead.
paths[i++] = path;
paths[i] = NULL;
} else {
// Otherwise we append the relative path to the base path and normalize it.
// The result will be a new absolute path.
paths[i++] = base;
paths[i++] = path;
paths[i] = NULL;
}
// Finally join everything together and normalize it.
return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
}
static void cwk_path_skip_segments_until_diverge(struct cwk_segment_joined *bsj,
struct cwk_segment_joined *osj, bool absolute, bool *base_available,
bool *other_available)
{
// Now looping over all segments until they start to diverge. A path may
// diverge if two segments are not equal or if one path reaches the end.
do {
// Check whether there is anything available after we skip everything which
// is invisible. We do that for both paths, since we want to let the caller
// know which path has some trailing segments after they diverge.
*base_available = cwk_path_segment_joined_skip_invisible(bsj, absolute);
*other_available = cwk_path_segment_joined_skip_invisible(osj, absolute);
// We are done if one or both of those paths reached the end. They either
// diverge or both reached the end - but in both cases we can not continue
// here.
if (!*base_available || !*other_available) {
break;
}
// Compare the content of both segments. We are done if they are not equal,
// since they diverge.
if (!cwk_path_is_string_equal(bsj->segment.begin, osj->segment.begin,
bsj->segment.size, osj->segment.size)) {
break;
}
// We keep going until one of those segments reached the end. The next
// segment might be invisible, but we will check for that in the beginning
// of the loop once again.
*base_available = cwk_path_get_next_segment_joined(bsj);
*other_available = cwk_path_get_next_segment_joined(osj);
} while (*base_available && *other_available);
}
size_t cwk_path_get_relative(const char *base_directory, const char *path,
char *buffer, size_t buffer_size)
{
size_t pos, base_root_length, path_root_length;
bool absolute, base_available, other_available, has_output;
const char *base_paths[2], *other_paths[2];
struct cwk_segment_joined bsj, osj;
pos = 0;
// First we compare the roots of those two paths. If the roots are not equal
// we can't continue, since there is no way to get a relative path from
// different roots.
cwk_path_get_root(base_directory, &base_root_length);
cwk_path_get_root(path, &path_root_length);
if (base_root_length != path_root_length ||
!cwk_path_is_string_equal(base_directory, path, base_root_length,
path_root_length)) {
cwk_path_terminate_output(buffer, buffer_size, pos);
return pos;
}
// Verify whether this is an absolute path. We need to know that since we can
// remove all back-segments if it is.
absolute = cwk_path_is_root_absolute(base_directory, base_root_length);
// Initialize our joined segments. This will allow us to use the internal
// functions to skip until diverge and invisible. We only have one path in
// them though.
base_paths[0] = base_directory;
base_paths[1] = NULL;
other_paths[0] = path;
other_paths[1] = NULL;
cwk_path_get_first_segment_joined(base_paths, &bsj);
cwk_path_get_first_segment_joined(other_paths, &osj);
// Okay, now we skip until the segments diverge. We don't have anything to do
// with the segments which are equal.
cwk_path_skip_segments_until_diverge(&bsj, &osj, absolute, &base_available,
&other_available);
// Assume there is no output until we have got some. We will need this
// information later on to remove trailing slashes or alternatively output a
// current-segment.
has_output = false;
// So if we still have some segments left in the base path we will now output
// a back segment for all of them.
if (base_available) {
do {
// Skip any invisible segment. We don't care about those and we don't need
// to navigate back because of them.
if (!cwk_path_segment_joined_skip_invisible(&bsj, absolute)) {
break;
}
// Toggle the flag if we have output. We need to remember that, since we
// want to remove the trailing slash.
has_output = true;
// Output the back segment and a separator. No need to worry about the
// superfluous segment since it will be removed later on.
pos += cwk_path_output_back(buffer, buffer_size, pos);
pos += cwk_path_output_separator(buffer, buffer_size, pos);
} while (cwk_path_get_next_segment_joined(&bsj));
}
// And if we have some segments available of the target path we will output
// all of those.
if (other_available) {
do {
// Again, skip any invisible segments since we don't need to navigate into
// them.
if (!cwk_path_segment_joined_skip_invisible(&osj, absolute)) {
break;
}
// Toggle the flag if we have output. We need to remember that, since we
// want to remove the trailing slash.
has_output = true;
// Output the current segment and a separator. No need to worry about the
// superfluous segment since it will be removed later on.
pos += cwk_path_output_sized(buffer, buffer_size, pos, osj.segment.begin,
osj.segment.size);
pos += cwk_path_output_separator(buffer, buffer_size, pos);
} while (cwk_path_get_next_segment_joined(&osj));
}
// If we have some output by now we will have to remove the trailing slash. We
// simply do that by moving back one character. The terminate output function
// will then place the '\0' on this position. Otherwise, if there is no
// output, we will have to output a "current directory", since the target path
// points to the base path.
if (has_output) {
--pos;
} else {
pos += cwk_path_output_current(buffer, buffer_size, pos);
}
// Finally, we can terminate the output - which means we place a '\0' at the
// current position or at the end of the buffer.
cwk_path_terminate_output(buffer, buffer_size, pos);
return pos;
}
size_t cwk_path_join(const char *path_a, const char *path_b, char *buffer,
size_t buffer_size)
{
const char *paths[3];
// This is simple. We will just create an array with the two paths which we
// wish to join.
paths[0] = path_a;
paths[1] = path_b;
paths[2] = NULL;
// And then call the join and normalize function which will do the hard work
// for us.
return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
}
size_t cwk_path_join_multiple(const char **paths, char *buffer,
size_t buffer_size)
{
// We can just call the internal join and normalize function for this one,
// since it will handle everything.
return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
}
void cwk_path_get_root(const char *path, size_t *length)
{
// We use a different implementation here based on the configuration of the
// library.
if (path_style == CWK_STYLE_WINDOWS) {
cwk_path_get_root_windows(path, length);
} else {
cwk_path_get_root_unix(path, length);
}
}
size_t cwk_path_change_root(const char *path, const char *new_root,
char *buffer, size_t buffer_size)
{
const char *tail;
size_t root_length, path_length, tail_length, new_root_length, new_path_size;
// First we need to determine the actual size of the root which we will
// change.
cwk_path_get_root(path, &root_length);
// Now we determine the sizes of the new root and the path. We need that to
// determine the size of the part after the root (the tail).
new_root_length = strlen(new_root);
path_length = strlen(path);
// Okay, now we calculate the position of the tail and the length of it.
tail = path + root_length;
tail_length = path_length - root_length;
// We first output the tail and then the new root, that's because the source
// path and the buffer may be overlapping. This way the root will not
// overwrite the tail.
cwk_path_output_sized(buffer, buffer_size, new_root_length, tail,
tail_length);
cwk_path_output_sized(buffer, buffer_size, 0, new_root, new_root_length);
// Finally we calculate the size o the new path and terminate the output with
// a '\0'.
new_path_size = tail_length + new_root_length;
cwk_path_terminate_output(buffer, buffer_size, new_path_size);
return new_path_size;
}
bool cwk_path_is_absolute(const char *path)
{
size_t length;
// We grab the root of the path. This root does not include the first
// separator of a path.
cwk_path_get_root(path, &length);
// Now we can determine whether the root is absolute or not.
return cwk_path_is_root_absolute(path, length);
}
bool cwk_path_is_relative(const char *path)
{
// The path is relative if it is not absolute.
return !cwk_path_is_absolute(path);
}
void cwk_path_get_basename(const char *path, const char **basename,
size_t *length)
{
struct cwk_segment segment;
// We get the last segment of the path. The last segment will contain the
// basename if there is any. If there are no segments we will set the basename
// to NULL and the length to 0.
if (!cwk_path_get_last_segment(path, &segment)) {
*basename = NULL;
if (length) {
*length = 0;
}
return;
}
// Now we can just output the segment contents, since that's our basename.
// There might be trailing separators after the basename, but the size does
// not include those.
*basename = segment.begin;
if (length) {
*length = segment.size;
}
}
size_t cwk_path_change_basename(const char *path, const char *new_basename,
char *buffer, size_t buffer_size)
{
struct cwk_segment segment;
size_t pos, root_size, new_basename_size;
// First we try to get the last segment. We may only have a root without any
// segments, in which case we will create one.
if (!cwk_path_get_last_segment(path, &segment)) {
// So there is no segment in this path. First we grab the root and output
// that. We are not going to modify the root in any way.
cwk_path_get_root(path, &root_size);
pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);
// We have to trim the separators from the beginning of the new basename.
// This is quite easy to do.
while (cwk_path_is_separator(new_basename)) {
++new_basename;
}
// Now we measure the length of the new basename, this is a two step
// process. First we find the '\0' character at the end of the string.
new_basename_size = 0;
while (new_basename[new_basename_size]) {
++new_basename_size;
}
// And then we trim the separators at the end of the basename until we reach
// the first valid character.
while (new_basename_size > 0 &&
cwk_path_is_separator(&new_basename[new_basename_size - 1])) {
--new_basename_size;
}
// Now we will output the new basename after the root.
pos += cwk_path_output_sized(buffer, buffer_size, pos, new_basename,
new_basename_size);
// And finally terminate the output and return the total size of the path.
cwk_path_terminate_output(buffer, buffer_size, pos);
return pos;
}
// If there is a last segment we can just forward this call, which is fairly
// easy.
return cwk_path_change_segment(&segment, new_basename, buffer, buffer_size);
}
void cwk_path_get_dirname(const char *path, size_t *length)
{
struct cwk_segment segment;
// We get the last segment of the path. The last segment will contain the
// basename if there is any. If there are no segments we will set the length
// to 0.
if (!cwk_path_get_last_segment(path, &segment)) {
*length = 0;
return;
}
// We can now return the length from the beginning of the string up to the
// beginning of the last segment.
*length = (size_t)(segment.begin - path);
}
bool cwk_path_get_extension(const char *path, const char **extension,
size_t *length)
{
struct cwk_segment segment;
const char *c;
// We get the last segment of the path. The last segment will contain the
// extension if there is any.
if (!cwk_path_get_last_segment(path, &segment)) {
return false;
}
// Now we search for a dot within the segment. If there is a dot, we consider
// the rest of the segment the extension. We do this from the end towards the
// beginning, since we want to find the last dot.
for (c = segment.end; c >= segment.begin; --c) {
if (*c == '.') {
// Okay, we found an extension. We can stop looking now.
*extension = c;
*length = (size_t)(segment.end - c);
return true;
}
}
// We couldn't find any extension.
return false;
}
bool cwk_path_has_extension(const char *path)
{
const char *extension;
size_t length;
// We just wrap the get_extension call which will then do the work for us.
return cwk_path_get_extension(path, &extension, &length);
}
size_t cwk_path_change_extension(const char *path, const char *new_extension,
char *buffer, size_t buffer_size)
{
struct cwk_segment segment;
const char *c, *old_extension;
size_t pos, root_size, trail_size, new_extension_size;
// First we try to get the last segment. We may only have a root without any
// segments, in which case we will create one.
if (!cwk_path_get_last_segment(path, &segment)) {
// So there is no segment in this path. First we grab the root and output
// that. We are not going to modify the root in any way. If there is no
// root, this will end up with a root size 0, and nothing will be written.
cwk_path_get_root(path, &root_size);
pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);
// Add a dot if the submitted value doesn't have any.
if (*new_extension != '.') {
pos += cwk_path_output_dot(buffer, buffer_size, pos);
}
// And finally terminate the output and return the total size of the path.
pos += cwk_path_output(buffer, buffer_size, pos, new_extension);
cwk_path_terminate_output(buffer, buffer_size, pos);
return pos;
}
// Now we seek the old extension in the last segment, which we will replace
// with the new one. If there is no old extension, it will point to the end of
// the segment.
old_extension = segment.end;
for (c = segment.begin; c < segment.end; ++c) {
if (*c == '.') {
old_extension = c;
}
}
pos = cwk_path_output_sized(buffer, buffer_size, 0, segment.path,
(size_t)(old_extension - segment.path));
// If the new extension starts with a dot, we will skip that dot. We always
// output exactly one dot before the extension. If the extension contains
// multiple dots, we will output those as part of the extension.
if (*new_extension == '.') {
++new_extension;
}
// We calculate the size of the new extension, including the dot, in order to
// output the trail - which is any part of the path coming after the
// extension. We must output this first, since the buffer may overlap with the
// submitted path - and it would be overridden by longer extensions.
new_extension_size = strlen(new_extension) + 1;
trail_size = cwk_path_output(buffer, buffer_size, pos + new_extension_size,
segment.end);
// Finally we output the dot and the new extension. The new extension itself
// doesn't contain the dot anymore, so we must output that first.
pos += cwk_path_output_dot(buffer, buffer_size, pos);
pos += cwk_path_output(buffer, buffer_size, pos, new_extension);
// Now we terminate the output with a null-terminating character, but before
// we do that we must add the size of the trail to the position which we
// output before.
pos += trail_size;
cwk_path_terminate_output(buffer, buffer_size, pos);
// And the position is our output size now.
return pos;
}
size_t cwk_path_normalize(const char *path, char *buffer, size_t buffer_size)
{
const char *paths[2];
// Now we initialize the paths which we will normalize. Since this function
// only supports submitting a single path, we will only add that one.
paths[0] = path;
paths[1] = NULL;
return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
}
size_t cwk_path_get_intersection(const char *path_base, const char *path_other)
{
bool absolute;
size_t base_root_length, other_root_length;
const char *end;
const char *paths_base[2], *paths_other[2];
struct cwk_segment_joined base, other;
// We first compare the two roots. We just return zero if they are not equal.
// This will also happen to return zero if the paths are mixed relative and
// absolute.
cwk_path_get_root(path_base, &base_root_length);
cwk_path_get_root(path_other, &other_root_length);
if (!cwk_path_is_string_equal(path_base, path_other, base_root_length,
other_root_length)) {
return 0;
}
// Configure our paths. We just have a single path in here for now.
paths_base[0] = path_base;
paths_base[1] = NULL;
paths_other[0] = path_other;
paths_other[1] = NULL;
// So we get the first segment of both paths. If one of those paths don't have
// any segment, we will return 0.
if (!cwk_path_get_first_segment_joined(paths_base, &base) ||
!cwk_path_get_first_segment_joined(paths_other, &other)) {
return base_root_length;
}
// We now determine whether the path is absolute or not. This is required
// because if will ignore removed segments, and this behaves differently if
// the path is absolute. However, we only need to check the base path because
// we are guaranteed that both paths are either relative or absolute.
absolute = cwk_path_is_root_absolute(path_base, base_root_length);
// We must keep track of the end of the previous segment. Initially, this is
// set to the beginning of the path. This means that 0 is returned if the
// first segment is not equal.
end = path_base + base_root_length;
// Now we loop over both segments until one of them reaches the end or their
// contents are not equal.
do {
// We skip all segments which will be removed in each path, since we want to
// know about the true path.
if (!cwk_path_segment_joined_skip_invisible(&base, absolute) ||
!cwk_path_segment_joined_skip_invisible(&other, absolute)) {
break;
}
if (!cwk_path_is_string_equal(base.segment.begin, other.segment.begin,
base.segment.size, other.segment.size)) {
// So the content of those two segments are not equal. We will return the
// size up to the beginning.
return (size_t)(end - path_base);
}
// Remember the end of the previous segment before we go to the next one.
end = base.segment.end;
} while (cwk_path_get_next_segment_joined(&base) &&
cwk_path_get_next_segment_joined(&other));
// Now we calculate the length up to the last point where our paths pointed to
// the same place.
return (size_t)(end - path_base);
}
bool cwk_path_get_first_segment(const char *path, struct cwk_segment *segment)
{
size_t length;
const char *segments;
// We skip the root since that's not part of the first segment. The root is
// treated as a separate entity.
cwk_path_get_root(path, &length);
segments = path + length;
// Now, after we skipped the root we can continue and find the actual segment
// content.
return cwk_path_get_first_segment_without_root(path, segments, segment);
}
bool cwk_path_get_last_segment(const char *path, struct cwk_segment *segment)
{
// We first grab the first segment. This might be our last segment as well,
// but we don't know yet. There is no last segment if there is no first
// segment, so we return false in that case.
if (!cwk_path_get_first_segment(path, segment)) {
return false;
}
// Now we find our last segment. The segment struct of the caller
// will contain the last segment, since the function we call here will not
// change the segment struct when it reaches the end.
while (cwk_path_get_next_segment(segment)) {
// We just loop until there is no other segment left.
}
return true;
}
bool cwk_path_get_next_segment(struct cwk_segment *segment)
{
const char *c;
// First we jump to the end of the previous segment. The first character must
// be either a '\0' or a separator.
c = segment->begin + segment->size;
if (*c == '\0') {
return false;
}
// Now we skip all separator until we reach something else. We are not yet
// guaranteed to have a segment, since the string could just end afterwards.
assert(cwk_path_is_separator(c));
do {
++c;
} while (cwk_path_is_separator(c));
// If the string ends here, we can safely assume that there is no other
// segment after this one.
if (*c == '\0') {
return false;
}
// Now we are safe to assume there is a segment. We store the beginning of
// this segment in the segment struct of the caller.
segment->begin = c;
// And now determine the size of this segment, and store it in the struct of
// the caller as well.
c = cwk_path_find_next_stop(c);
segment->end = c;
segment->size = (size_t)(c - segment->begin);
// Tell the caller that we found a segment.
return true;
}
bool cwk_path_get_previous_segment(struct cwk_segment *segment)
{
const char *c;
// The current position might point to the first character of the path, which
// means there are no previous segments available.
c = segment->begin;
if (c <= segment->segments) {
return false;
}
// We move towards the beginning of the path until we either reached the
// beginning or the character is no separator anymore.
do {
--c;
if (c < segment->segments) {
// So we reached the beginning here and there is no segment. So we return
// false and don't change the segment structure submitted by the caller.
return false;
}
} while (cwk_path_is_separator(c));
// We are guaranteed now that there is another segment, since we moved before
// the previous separator and did not reach the segment path beginning.
segment->end = c + 1;
segment->begin = cwk_path_find_previous_stop(segment->segments, c);
segment->size = (size_t)(segment->end - segment->begin);
return true;
}
enum cwk_segment_type cwk_path_get_segment_type(
const struct cwk_segment *segment)
{
// We just make a string comparison with the segment contents and return the
// appropriate type.
if (strncmp(segment->begin, ".", segment->size) == 0) {
return CWK_CURRENT;
} else if (strncmp(segment->begin, "..", segment->size) == 0) {
return CWK_BACK;
}
return CWK_NORMAL;
}
bool cwk_path_is_separator(const char *str)
{
const char *c;
// We loop over all characters in the read symbols.
c = separators[path_style];
while (*c) {
if (*c == *str) {
return true;
}
++c;
}
return false;
}
size_t cwk_path_change_segment(struct cwk_segment *segment, const char *value,
char *buffer, size_t buffer_size)
{
size_t pos, value_size, tail_size;
// First we have to output the head, which is the whole string up to the
// beginning of the segment. This part of the path will just stay the same.
pos = cwk_path_output_sized(buffer, buffer_size, 0, segment->path,
(size_t)(segment->begin - segment->path));
// In order to trip the submitted value, we will skip any separator at the
// beginning of it and behave as if it was never there.
while (cwk_path_is_separator(value)) {
++value;
}
// Now we determine the length of the value. In order to do that we first
// locate the '\0'.
value_size = 0;
while (value[value_size]) {
++value_size;
}
// Since we trim separators at the beginning and in the end of the value we
// have to subtract from the size until there are either no more characters
// left or the last character is no separator.
while (value_size > 0 && cwk_path_is_separator(&value[value_size - 1])) {
--value_size;
}
// We also have to determine the tail size, which is the part of the string
// following the current segment. This part will not change.
tail_size = strlen(segment->end);
// Now we output the tail. We have to do that, because if the buffer and the
// source are overlapping we would override the tail if the value is
// increasing in length.
cwk_path_output_sized(buffer, buffer_size, pos + value_size, segment->end,
tail_size);
// Finally we can output the value in the middle of the head and the tail,
// where we have enough space to fit the whole trimmed value.
pos += cwk_path_output_sized(buffer, buffer_size, pos, value, value_size);
// Now we add the tail size to the current position and terminate the output -
// basically, ensure that there is a '\0' at the end of the buffer.
pos += tail_size;
cwk_path_terminate_output(buffer, buffer_size, pos);
// And now tell the caller how long the whole path would be.
return pos;
}
enum cwk_path_style cwk_path_guess_style(const char *path)
{
const char *c;
size_t root_length;
struct cwk_segment segment;
// First we determine the root. Only windows roots can be longer than a single
// slash, so if we can determine that it starts with something like "C:", we
// know that this is a windows path.
cwk_path_get_root_windows(path, &root_length);
if (root_length > 1) {
return CWK_STYLE_WINDOWS;
}
// Next we check for slashes. Windows uses backslashes, while unix uses
// forward slashes. Windows actually supports both, but our best guess is to
// assume windows with backslashes and unix with forward slashes.
for (c = path; *c; ++c) {
if (*c == *separators[CWK_STYLE_UNIX]) {
return CWK_STYLE_UNIX;
} else if (*c == *separators[CWK_STYLE_WINDOWS]) {
return CWK_STYLE_WINDOWS;
}
}
// This path does not have any slashes. We grab the last segment (which
// actually must be the first one), and determine whether the segment starts
// with a dot. A dot is a hidden folder or file in the UNIX world, in that
// case we assume the path to have UNIX style.
if (!cwk_path_get_last_segment(path, &segment)) {
// We couldn't find any segments, so we default to a UNIX path style since
// there is no way to make any assumptions.
return CWK_STYLE_UNIX;
}
if (*segment.begin == '.') {
return CWK_STYLE_UNIX;
}
// And finally we check whether the last segment contains a dot. If it
// contains a dot, that might be an extension. Windows is more likely to have
// file names with extensions, so our guess would be windows.
for (c = segment.begin; *c; ++c) {
if (*c == '.') {
return CWK_STYLE_WINDOWS;
}
}
// All our checks failed, so we will return a default value which is currently
// UNIX.
return CWK_STYLE_UNIX;
}
void cwk_path_set_style(enum cwk_path_style style)
{
// We can just set the global path style variable and then the behaviour for
// all functions will change accordingly.
assert(style == CWK_STYLE_UNIX || style == CWK_STYLE_WINDOWS);
path_style = style;
}
enum cwk_path_style cwk_path_get_style(void)
{
// Simply return the path style which we store in a global variable.
return path_style;
}