Click to See Complete Forum and Search --> : Perl string functions in C


debiandude
09-08-2001, 04:44 PM
I though it would be cool to implement a few cool perl functions in c. I've tired this a few times before and was unsuccessful. Any way, today I have split, join, chop, chomp done. They are minimal so they might not do all that their perl counter parts do. My next one I want to do is subsitute or =~ s/ in perl but my brain is failing me on ways to replace the strings. If anyone has any ideas post em up. In the main time here is what I have started, corrections and contributions of course are appreciated:

#include <stdio.h>
#include <string.h>

#define MAXELEMENTS 24
#define MAXSTRING 1024

void chop(char *src);
void chomp(char *src);
char *join(char *glue, const char **list);
char **split(char *delim, char *str);
char *subsitute(char *src, char *pattern);

char **split(char *delim, char *str) {

static char *list[MAXELEMENTS];
char *token;
int i = 0;


token = strtok(str, delim);

list[i++] = token;

while(((token=strtok(NULL, delim)) != NULL) && (i<MAXELEMENTS)) {
list[i++] = token;
}

return(list);

}

char *join(char *glue, const char **list) {

static char joined[MAXSTRING];

while(*list) {
if(joined == NULL) {
sprintf(joined, "%s%s", *list, glue);
} else {
sprintf(joined, "%s%s%s", joined, *list, glue);
}
*list++;
}

joined[strlen(joined)-1] = '\0';

return joined;

}

void chop(char *src) {

src[strlen(src)-1] = 0;

}

void chomp(char *src) {

if(src[strlen(src)-1] == '\n') {
src[strlen(src)-1] = 0;
}

}

debiandude
09-08-2001, 08:11 PM
Ok I added the minimalistic subsitute. Here you have it, complete with a test now to, so you can see what I am talking about:

#include <stdio.h>
#include <string.h>

#define MAXELEMENTS 24
#define MAXSTRING 1024

void chop(char *src);
void chomp(char *src);
char *join(char *glue, char **list);
char **split(char *delim, char *str);
char *subsitute(char *src, char *pattern);

int main(void) {

char *string = "This is the originial string";
char test1[] = "A|B|C|D|E|F|G", *test2, **list;
char test3[] = "This is the string to be chopped";
char test4[] = "This string has a newline at the end ";

test4[strlen(test4)-1] = '\n';

puts(test1);
list = split("|", test1);
test2 = join(":", list);
puts(test2);

puts(string);
string = subsitute(string, "/originial/replaced/");
puts(string);

puts(test3);
chop(test3);
puts(test3);

puts(test4);
chomp(test4);
puts(test4);

return 0;

}

char *subsitute(char *src, char *pattern) {

char *sptr1, *sprt2, *replaced, *with, *from, **list;
char match[MAXSTRING];

sprintf(match, "%s", pattern);

list = split("/", match);

from = list[0];
with = list[1];

replaced = (char *) malloc(strlen(src)+1);

if (replaced==NULL) return src;
replaced[0] = 0;
sptr1 = src;

for ( ; ; ) {

sprt2 = sptr1;
sptr1 = strstr(sptr1, from);

if (sptr1 == NULL) {
strcat(replaced, sprt2);
break;
}

while (sptr1 > sprt2) {
sprintf(replaced, "%s%c", replaced, *sprt2);
sprt2++;
}

if((strlen(from)-strlen(with))>0) {
replaced = (char*)realloc(replaced,strlen(src)+strlen(from)-strlen(with)+1);
if (replaced == NULL) return src;
}

strcat(replaced, with);
sptr1 += strlen(from);

}

return replaced;

}

char **split(char *delim, char *str) {

static char *list[MAXELEMENTS];
char *token;
int i = 0;


token = strtok(str, delim);

list[i++] = token;

while(((token=strtok(NULL, delim)) != NULL) && (i<MAXELEMENTS)) {
list[i++] = token;
}

return(list);

}

char *join(char *glue, char **list) {

static char joined[MAXSTRING];

while(*list) {
if(joined == NULL) {
sprintf(joined, "%s%s", *list, glue);
} else {
sprintf(joined, "%s%s%s", joined, *list, glue);
}
*list++;
}

joined[strlen(joined)-1] = '\0';

return joined;

}

void chop(char *src) {

src[strlen(src)-1] = 0;

}

void chomp(char *src) {

if(src[strlen(src)-1] == '\n') {
src[strlen(src)-1] = 0;
}

}

TheLinuxDuck
09-08-2001, 11:43 PM
It's easier than you think to completely malloc a 2d array. First, treat the 2d pointer as such:

char **list;



Then, malloc the base count:

count=25;
// Don't forget the *4.. remember that a pointer is 4 bytes
list=(char **)malloc(count*4);
if(list==NULL) {
perror("Couldn't malloc");
return 1;
}



Then, allocate each item in the list, as if it was created by a fixed size:

for(i=0;i<count;++i) {
list[i]=(char *)malloc(1000);
if(list[i]==NULL) {
printf("Couldn't alloc for item number %d\n",i);
}
}

But, when you finish up, don't forget to first deallocate each item (unless it is NULL), and deallocate the main item:

for(i=0;i<count;++i) {
if(list[i]!=NULL) free(list[i]);
}


free(list);



And, that's all there is to it! (^=


Btw, duplicating the s/// operator is going to be a real challenge.. I've given it extensive thought, and am afraid to even begin such a thing.. (^=

[ 08 September 2001: Message edited by: TheLinuxDuck ]

debiandude
09-09-2001, 12:08 AM
Well the implementation dosn't have to be copmlete, and personally I don't think I proabably would be able to make it complete because they're some option of s/// that I don't even know how to use. I just wanted to do a few basic ones because their have been quite a few times when I have said to myself, geeze it wish I could do =~ s/(\w+)/<$1>/g; and maybe if I get around do all that stuff I won't have to dream.. And don't tell me to use the perl regular expression for c library, how would I learn if i just did that :-)

[ 09 September 2001: Message edited by: debiandude ]

TheLinuxDuck
09-09-2001, 12:18 AM
Originally posted by debiandude:
<STRONG>Well the implementation dosn't have to be copmlete, and personally I don't think I proabably would be able to make it complete because they're some option of s/// that I don't even know how to use. I just wanted to do a few basic ones because their have been quite a few times when I have said to myself, geeze it wish I could do =~ s/(\w+)/&lt;$1&gt;/g; and maybe if I get around do all that stuff I won't have to dream.. </STRONG>

I'm totally with you.. I'd love to be able to do some of the s/// stuff in C, with some ease.. I don't know diddly about the existing regexp stuff.. all I know is it's not easy to use.. I've not been able to find any examples of using it, let alone being able to use it.. (^=

Maybe you and I could put our heads together and build up a C version that compares to the perl one.. I don't know how close we could get, but man that would be awesome to try! (^=

<STRONG>
And don't tell me to use the perl regular expression for c library, how would I learn if i just did that :-)
</STRONG>

I completely understand. It's things like this that help us to learn tricks and techniques to improve our abilities....

debiandude
09-09-2001, 12:35 AM
Wow, absolutly, I would be honored for you to help me with this. I guess we could set up something with sourceforge, I havn't done anything with them yet but they have so many projects their it can't be that difficult.

Anyway, I guess we should start by figuring what we want to start with. I was going to do the \w, \d, \s and +, next. Here is the subsitute code so far. I am going to need to get rid of using the split function in the sub function also. :-)
char *subsitute(char *src, char *pattern) {

char *sptr1, *sprt2, *replaced, *with, *from, **list;
char match[(strlen(pattern)+1)*sizeof(char)];

sprintf(match, "%s", pattern);

list = split("/", match);

from = *list++;
with = *list;

if((replaced = (char *)malloc(strlen(src)+1)) == NULL) {
return src;
}

*replaced = 0;
sptr1 = src;

for ( ; ; ) {

sprt2 = sptr1;
sptr1 = strstr(sptr1, from);

if (sptr1 == NULL) {
strcat(replaced, sprt2);
break;
}

while (sptr1 &gt; sprt2) sprintf(replaced, "%s%c", replaced, *sprt2++);

if((strlen(from)-strlen(with)) &gt; 0) {
replaced = (char*)realloc(replaced,strlen(src)+strlen(from)-strlen(with)+1);
if (replaced == NULL) return src;
}

strcat(replaced, with);
sptr1 += strlen(from);

}

return replaced;

}

debiandude
09-09-2001, 01:56 PM
Okay last nite I sat for a very long time and decided I was I was going to need to do.

The first step would be to create a string parses, that account for all the different possibilities is the s/// function.

Also we need to take in account precedence, which goes parentheses, multipliers, seqences and anchors, and last alternation.

So we create the string parser, and then we have to create each of the individual subs which account for the operators. Now each function needs to have a pointer to the intatance of the operator in the array, and with some of the operators, namely parens, we need to have a find end parsers to, which will then process the contents of the parens (prolly just a loop).

I applied for a prject on sourceforge called cregex and I am currently waiting for approval. If anyone else is interested post here, and maybe we could get some stuff started. Thanks for you time!

debiandude
09-09-2001, 03:16 PM
Okay here is a sample expression parser, which dosn't really do anything I just want to check to see if I am on the right track:

#include &lt;stdlib.h&gt;
#include &lt;ctype.h&gt;
#include &lt;stdio.h&gt;
#include &lt;string.h&gt;

#define PARENTHESES 1
#define MULTIPLIER 2
#define ANCHORING 3
#define ALTERNATION 4
#define STRING 5

extern char *expression;
char token[80];
char tok_type;

void eval_expression() {

get_token();
if(!*token) {
serror(2);
return;
}

process_expression(answer);
if(*token) serror(0);

}

void process_expression() {

char ttok_type;
char tmp_token[80];

if(tok_type == STRING) {
strcpy(temp_token, token);
ttok_type - tok_type;

get_token();
if(*token != '/') {
putback();
strcpy(token, temp_token);
tok_type = ttok_type;
} else {
get_token();
eval_alternation();
vars[slot] = *parsing;
return;
}
}

eval_alternation();

}

void eval_alternation() {

register char op;
char tmp[80];

eval_seq_anch();
while((op = *token) == '|') {
get_token();
eval_seq_anch(tmp);
if(op == '|') {
/* do what needs to be done ;-) */
}
}
}

void eval_seq_anch() {

register char op;
char tmp[80];

eval_multipliers();
while((op = *token) == '^' || op == '$') {
get_token();
eval_multiplers(tmp);
switch(op) {
/* Case for each of the operators */
}
}
}

void eval_multipliers() {

char tmp[80];
register int t;

eval_parenthese();
if(op = *token) == '?' || op == '+' || op == '*) {
get_token();
eval_parenthese(tmp);
switch(op) {
/* Case for each of the operators */
}
}
}

void eval_parentheses() {

if((*token == '(') {
get_token();
if(*token != ')')
serror(PARENTHESES);
get_token();
} else atom(answer);
}

void atom() {

switch(tok_type) {
case STRING:
*blargh = find_string(token);
get_tok();
return;
default:
serror(0);
}
}

void putback(void) {

char *cptr;

cptr = token;
for( ; *cptr; cptr++) expression--;

}

void serror(int error) {

static char *err[] = }
"Syntax Error",
"Unbalanced Parentheses",
"No Expression Present"
};

fprintf(stderr, "%s\n", err[error]);

}

void get_token(void) {

register char *tmp;

tok_type = 0;
tmp = token;
*tmp = '\0';

if(!*expression) return;

while(isspace(*expression)) ++expression;

if(strchr("()", *expression)) {
tok_type = PARENTHESES;
*tmp++ = *expression++;
} else if(strchr("?+*", *expression)) {
tok_type = MULTIPLIER;
*tmp++ = *expression++;
} else if(strchr("^$\", *expression) {
tok_type = ANCHORING;
*tmp++ = *expression++;
} else if(strchr("|", *expression) {
tok_type = ALTERNATION;
*tmp++ = *expression++;
} else if(isalpha(*expression) {
while(!isdelim(*expression)) *tmp++ = *expression++;
tok_type = STRING;
}

*tmp = '\0';

}

int isdelim(char c) {

if(strchr(" ()?:+*^$\|.")) return 1;

return 0;

}


[ 09 September 2001: Message edited by: debiandude ]