© 2026 Hedgehog Software, LLC

TwitterGitHubDiscord
More
CommunitiesDocsAboutTermsPrivacy
Search
Star
Setup for Free
C#C
C#•4y ago•
17 replies
ero

Optimizing some string manipulation

I want to both substring an input string at the last occurrence of
'/'
'/'
and normalize it into only alphanumeric (
a-z
a-z
,
A-Z
A-Z
,
0-9
0-9
) characters, turning any characters unable to be normalized (meaning characters with diacritics turning into their non-diacritic versions (
ä
ä
->
a
a
)) into
_
_
.

Here's what I've got so far;
if (input.Length == 0)
{
  return "";
}

Span<char> outBuf = stackalloc char[128];
char* pNorm = stackalloc char[128];

fixed (char* pIn = input, pOut = outBuf)
{
  int dLength = NormalizeString(2, (ushort*)pIn, input.Length, (ushort*)pNorm, 128);

  int start = 127, length = 0;
  char first = default;

  for (int i = dLength - 1; i >= 0; i--)
  {
    char c = pNorm[i];

    if (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.NonSpacingMark)
    {
      continue;
    }

    if (c is '/')
    {
      break;
    }

    pOut[start] = first = c switch
    {
      (>= '0' and <= '9') or (>= 'A' and <= 'Z') or (>= 'a' and <= 'z') => c,
      _ => '_'
    };

    start--;
    length++;
  }

  if (first is >= '0' and <= '9')
  {
    pOut[start] = '_';
    length++;
  }

  return outBuf.Slice(start, length).ToString();
}

[DllImport("normaliz")]
static extern int NormalizeString(
  int normForm,
  ushort* source,
  int sourceLength,
  ushort* destination,
  int destinationLength);
if (input.Length == 0)
{
  return "";
}

Span<char> outBuf = stackalloc char[128];
char* pNorm = stackalloc char[128];

fixed (char* pIn = input, pOut = outBuf)
{
  int dLength = NormalizeString(2, (ushort*)pIn, input.Length, (ushort*)pNorm, 128);

  int start = 127, length = 0;
  char first = default;

  for (int i = dLength - 1; i >= 0; i--)
  {
    char c = pNorm[i];

    if (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.NonSpacingMark)
    {
      continue;
    }

    if (c is '/')
    {
      break;
    }

    pOut[start] = first = c switch
    {
      (>= '0' and <= '9') or (>= 'A' and <= 'Z') or (>= 'a' and <= 'z') => c,
      _ => '_'
    };

    start--;
    length++;
  }

  if (first is >= '0' and <= '9')
  {
    pOut[start] = '_';
    length++;
  }

  return outBuf.Slice(start, length).ToString();
}

[DllImport("normaliz")]
static extern int NormalizeString(
  int normForm,
  ushort* source,
  int sourceLength,
  ushort* destination,
  int destinationLength);

However, this is hardly faster than using
Substring
Substring
and
Normalize
Normalize
(with some custom code involving
CharUnicodeInfo.GetUnicodeCategory
CharUnicodeInfo.GetUnicodeCategory
).

Any ideas?
C# banner
C#Join
We are a programming server aimed at coders discussing everything related to C# (CSharp) and .NET.
61,871Members
Resources

Similar Threads

Was this page helpful?
Recent Announcements

Similar Threads

String manipulation etc
C#CC# / help
3mo ago
Write a String manipulation program [Answered]
C#CC# / help
4y ago
❔ DBSet manipulation
C#CC# / help
3y ago
Text Manipulation
C#CC# / help
4y ago