-
Notifications
You must be signed in to change notification settings - Fork 1
/
tokenize.c
95 lines (85 loc) · 2.9 KB
/
tokenize.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// adapted from https://stackoverflow.com/a/69808797/10773089
char** shlex_split(const char* s, int* num_tokens) {
char** result = NULL;
int capacity = 0;
int size = 0;
char* token = NULL;
int token_size = 0;
int token_capacity = 0;
char quote = '\0';
int escape = 0;
for (const char* p = s; *p != '\0'; p++) {
char c = *p;
if (escape) {
escape = 0;
if (quote && c != '\\' && c != quote) {
if (token_size + 1 >= token_capacity) {
token_capacity = (token_capacity == 0) ? 16 : token_capacity * 2;
token = realloc(token, token_capacity);
}
token[token_size++] = '\\';
}
if (token_size + 1 >= token_capacity) {
token_capacity = (token_capacity == 0) ? 16 : token_capacity * 2;
token = realloc(token, token_capacity);
}
token[token_size++] = c;
} else if (c == '\\') {
escape = 1;
} else if (!quote && (c == '\'' || c == '\"')) {
quote = c;
} else if (quote && c == quote) {
quote = '\0';
if (token_size == 0) {
if (size + 1 >= capacity) {
capacity = (capacity == 0) ? 16 : capacity * 2;
result = realloc(result, capacity * sizeof(char*));
}
result[size++] = strdup("");
}
} else if (!isspace(c) || quote) {
if (token_size + 1 >= token_capacity) {
token_capacity = (token_capacity == 0) ? 16 : token_capacity * 2;
token = realloc(token, token_capacity);
}
token[token_size++] = c;
} else if (token_size > 0) {
if (size + 1 >= capacity) {
capacity = (capacity == 0) ? 16 : capacity * 2;
result = realloc(result, capacity * sizeof(char*));
}
token[token_size] = '\0';
result[size++] = token;
token = NULL;
token_size = 0;
token_capacity = 0;
}
}
if (token_size > 0) {
if (size + 1 >= capacity) {
capacity = (capacity == 0) ? 16 : capacity * 2;
result = realloc(result, capacity * sizeof(char*));
}
token[token_size] = '\0';
result[size++] = token;
}
if (size + 1 >= capacity) {
capacity = (capacity == 0) ? 16 : capacity * 2;
result = realloc(result, capacity * sizeof(char*));
}
result[size] = NULL;
*num_tokens = size;
return result;
}
int tokenize(const char* str, int* argc, char*** argv) {
*argv = shlex_split(str, argc);
if (*argv == NULL) {
fprintf(stderr, "Error: Memory allocation failed.\n");
return -1;
}
return 0;
}