patterncMinor
Extracting information from a URI
Viewed 0 times
urifromextractinginformation
Problem
I'm completely new to C and my first project is to create a simple cURL-like HTTP request client... but first, I need to create a function that can parse a URL. I've created this function, and it seems to work quite well, but since I'm so new to the complexities of C, can someone check for any errors or to see if I can improve clock efficiency?
Example usage can be found here.
```
#include
#include
#include
void filterURLstring(char *weburl, char protocol, char username, char password, char host, char port, char path);
int main() {
char *text = "http://mhdfffffjddj:abc123@192.168.0.2:8888/servlet/rece/";
char protocol, username, password, host, port, path;
filterURLstring(text, &protocol, &username, &password, &host, &port, &path);
printf("Protocol:\"%s\"\n", protocol);
printf("Username:\"%s\"\n", username);
printf("Password:\"%s\"\n", password);
printf("Host:\"%s\"\n", host);
printf("Port:\"%s\"\n", port);
printf("Path:\"%s\"\n", path);
free(protocol);
free(username);
free(password);
free(host);
free(port);
free(path);
return 0;
}
void filterURLstring(char *weburl, char protocol, char username, char password, char host, char port, char path) {
char postprotocol, postuserinfo, postusername, login, posthost, posthostname, hostloc, postport;
int isPath;
if ((postprotocol = strstr(weburl, "://")) == NULL) {
printf("Input URL is invalid. Exiting...\n");
exit(1);
}
protocol = malloc(sizeof(char) (postprotocol - weburl + 1));
strncpy(*protocol, weburl, postprotocol - weburl);
postprotocol = (postprotocol + (*(postprotocol+3)=='/'?4:3));
if ((postuserinfo = strstr(postprotocol, "@")) != NULL) {
if ((postusername = strchr(postprotocol, ':')) != NULL) {
password = malloc(sizeof(char) (postuserinfo - postusername + 1));
strncpy(*password, postusername + 1, (postuserinfo - postusernam
Example usage can be found here.
```
#include
#include
#include
void filterURLstring(char *weburl, char protocol, char username, char password, char host, char port, char path);
int main() {
char *text = "http://mhdfffffjddj:abc123@192.168.0.2:8888/servlet/rece/";
char protocol, username, password, host, port, path;
filterURLstring(text, &protocol, &username, &password, &host, &port, &path);
printf("Protocol:\"%s\"\n", protocol);
printf("Username:\"%s\"\n", username);
printf("Password:\"%s\"\n", password);
printf("Host:\"%s\"\n", host);
printf("Port:\"%s\"\n", port);
printf("Path:\"%s\"\n", path);
free(protocol);
free(username);
free(password);
free(host);
free(port);
free(path);
return 0;
}
void filterURLstring(char *weburl, char protocol, char username, char password, char host, char port, char path) {
char postprotocol, postuserinfo, postusername, login, posthost, posthostname, hostloc, postport;
int isPath;
if ((postprotocol = strstr(weburl, "://")) == NULL) {
printf("Input URL is invalid. Exiting...\n");
exit(1);
}
protocol = malloc(sizeof(char) (postprotocol - weburl + 1));
strncpy(*protocol, weburl, postprotocol - weburl);
postprotocol = (postprotocol + (*(postprotocol+3)=='/'?4:3));
if ((postuserinfo = strstr(postprotocol, "@")) != NULL) {
if ((postusername = strchr(postprotocol, ':')) != NULL) {
password = malloc(sizeof(char) (postuserinfo - postusername + 1));
strncpy(*password, postusername + 1, (postuserinfo - postusernam
Solution
Think in terms of an Abstract Data Type (ADT)
@x79 is right about using a
In fact, it makes sense to think of this in terms of an abstract data type,
as defined in Code Complete:
An abstract data type is a collection of data and operations that work on that data.
The collection of data is the URL info, wrapped in a struct, and the operations can be:
This collection of methods together form an abstract data type.
They encapsulate the operations you need,
while hiding the implementation details.
Users of a
they just need to know what the operations are in the ADT (create / delete / isValid / print).
These methods closely collaborate with each other,
emulating class-like behavior in object oriented languages.
Armed with this ADT,
your
The purpose of the
It's not good when a function exits in the middle of the program.
Controlling the program flow is not the job of a function parsing a URL.
The parser should just parse, and let the caller decide what to do with an invalid input.
Other improvements
Instead of hardcoding the URL used by the program,
it would be much more useful to take it as a command line argument, for example:
No need to do
the compiler automatically adds that.
This line is too tightly packed:
Use spaces around operators to make it more readable, like this:
Suggested implementation
Putting some of the above points together,
a Url Info abstract data type and a main method using it might look like this:
```
#include
#include
#include
typedef struct UrlInfo {
char * protocol;
char * username;
char * password;
char * host;
char * port;
char * path;
} UrlInfo;
UrlInfo createUrlInfo(const char );
void deleteUrlInfo(UrlInfo *);
int isValidUrlInfo(UrlInfo *);
void printUrlInfo(UrlInfo *);
int main(int argc, char ** argv) {
int i;
UrlInfo * urlInfo;
for (i = 1; i protocol = NULL;
urlInfo->username = NULL;
urlInfo->password = NULL;
urlInfo->host = NULL;
urlInfo->port = NULL;
urlInfo->path = NULL;
int isPath;
if ((postprotocol = strstr(weburl, "://")) == NULL) {
return urlInfo;
}
urlInfo->protocol = malloc(sizeof(char) * (postprotocol - weburl + 1));
strncpy(urlInfo->protocol, weburl, postprotocol - weburl);
postprotocol = (postprotocol + (*(postprotocol + 3) == '/' ? 4 : 3));
if ((postuserinfo = strstr(postprotocol, "@")) != NULL) {
if ((postusername = strchr(postprotocol, ':')) != NULL) {
urlInfo->password = malloc(sizeof(char) * (postuserinfo - postusername + 1));
strncpy(urlInfo->password, postusername + 1, (postuserinfo - postusername - 1));
} else {
urlInfo->password = NULL;
}
int userlen = (postusername?postusername:postuserinfo) - postprotocol;
urlInfo->username = malloc(sizeof(char) * userlen + 1);
strncpy(urlInfo->username, postprotocol, userlen);
}
hostloc = (postuserinfo?postuserinfo + 1:postprotocol);
if ((posthost = strchr(hostloc, '/')) == NULL) {
posthost = hostloc + strlen(hostloc);
isPath = 1;
}
if ((posthostname = strchr(hostloc, ':')) == NULL) {
posthostname = posthost;
}
urlInfo->host = malloc(sizeof(char) * (posthostname - hostloc + 1));
strncpy(urlInfo->host, hostloc, (posthostname - hostloc));
if (posthostname != posthost) {
posthostname++;
urlInfo->port = malloc(sizeof(char) * (posthost - posthostname + 1));
strncpy(urlInfo->port, posthostname, (posthost - posthostname));
} else {
urlInfo->port = malloc(sizeof(char) * 2 + 1);
strncpy(urlInfo->port, "80", 2);
}
if (isPath) {
urlInfo->path = malloc(sizeof(char) * strlen(posthost));
strncpy(urlInfo->path, posthost, strlen(posthost));
} else {
urlInfo->path = calloc(1, 1);
}
return urlInfo;
}
void deleteUrlInfo(UrlIn
@x79 is right about using a
struct for this.In fact, it makes sense to think of this in terms of an abstract data type,
as defined in Code Complete:
An abstract data type is a collection of data and operations that work on that data.
The collection of data is the URL info, wrapped in a struct, and the operations can be:
UrlInfo * createUrlInfo(const char *);
deleteUrlInfo(UrlInfo *);
isValidUrlInfo(UrlInfo *);
printUrlInfo(UrlInfo *);This collection of methods together form an abstract data type.
They encapsulate the operations you need,
while hiding the implementation details.
Users of a
UrlInfo don't need to know how the operations work,they just need to know what the operations are in the ADT (create / delete / isValid / print).
These methods closely collaborate with each other,
emulating class-like behavior in object oriented languages.
Armed with this ADT,
your
main function can be rewritten in a more functional, natural way:int main() {
const char * text = "http://mhdfffffjddj:abc123@192.168.0.2:8888/servlet/rece/";
UrlInfo * urlInfo = createUrlInfo(text);
if (isValidUrlInfo(urlInfo)) {
printUrlInfo(urlInfo);
}
deleteUrlInfo(urlInfo);
}The purpose of the
isValidUrlInfo is to move the exit statement out of the current implementation.It's not good when a function exits in the middle of the program.
Controlling the program flow is not the job of a function parsing a URL.
The parser should just parse, and let the caller decide what to do with an invalid input.
Other improvements
port make would make sense to store in an int, not a char*Instead of hardcoding the URL used by the program,
it would be much more useful to take it as a command line argument, for example:
int main(int argc, char ** argv) {
const char * text = argv[1];
UrlInfo * urlInfo = createUrlInfo(text);
if (isValidUrlInfo(urlInfo)) {
printUrlInfo(urlInfo);
}
deleteUrlInfo(urlInfo);
}No need to do
return 0 at the end of main,the compiler automatically adds that.
This line is too tightly packed:
int userlen = (postusername?postusername:postuserinfo) - postprotocol;Use spaces around operators to make it more readable, like this:
int userlen = (postusername ? postusername : postuserinfo) - postprotocol;Suggested implementation
Putting some of the above points together,
a Url Info abstract data type and a main method using it might look like this:
```
#include
#include
#include
typedef struct UrlInfo {
char * protocol;
char * username;
char * password;
char * host;
char * port;
char * path;
} UrlInfo;
UrlInfo createUrlInfo(const char );
void deleteUrlInfo(UrlInfo *);
int isValidUrlInfo(UrlInfo *);
void printUrlInfo(UrlInfo *);
int main(int argc, char ** argv) {
int i;
UrlInfo * urlInfo;
for (i = 1; i protocol = NULL;
urlInfo->username = NULL;
urlInfo->password = NULL;
urlInfo->host = NULL;
urlInfo->port = NULL;
urlInfo->path = NULL;
int isPath;
if ((postprotocol = strstr(weburl, "://")) == NULL) {
return urlInfo;
}
urlInfo->protocol = malloc(sizeof(char) * (postprotocol - weburl + 1));
strncpy(urlInfo->protocol, weburl, postprotocol - weburl);
postprotocol = (postprotocol + (*(postprotocol + 3) == '/' ? 4 : 3));
if ((postuserinfo = strstr(postprotocol, "@")) != NULL) {
if ((postusername = strchr(postprotocol, ':')) != NULL) {
urlInfo->password = malloc(sizeof(char) * (postuserinfo - postusername + 1));
strncpy(urlInfo->password, postusername + 1, (postuserinfo - postusername - 1));
} else {
urlInfo->password = NULL;
}
int userlen = (postusername?postusername:postuserinfo) - postprotocol;
urlInfo->username = malloc(sizeof(char) * userlen + 1);
strncpy(urlInfo->username, postprotocol, userlen);
}
hostloc = (postuserinfo?postuserinfo + 1:postprotocol);
if ((posthost = strchr(hostloc, '/')) == NULL) {
posthost = hostloc + strlen(hostloc);
isPath = 1;
}
if ((posthostname = strchr(hostloc, ':')) == NULL) {
posthostname = posthost;
}
urlInfo->host = malloc(sizeof(char) * (posthostname - hostloc + 1));
strncpy(urlInfo->host, hostloc, (posthostname - hostloc));
if (posthostname != posthost) {
posthostname++;
urlInfo->port = malloc(sizeof(char) * (posthost - posthostname + 1));
strncpy(urlInfo->port, posthostname, (posthost - posthostname));
} else {
urlInfo->port = malloc(sizeof(char) * 2 + 1);
strncpy(urlInfo->port, "80", 2);
}
if (isPath) {
urlInfo->path = malloc(sizeof(char) * strlen(posthost));
strncpy(urlInfo->path, posthost, strlen(posthost));
} else {
urlInfo->path = calloc(1, 1);
}
return urlInfo;
}
void deleteUrlInfo(UrlIn
Code Snippets
UrlInfo * createUrlInfo(const char *);
deleteUrlInfo(UrlInfo *);
isValidUrlInfo(UrlInfo *);
printUrlInfo(UrlInfo *);int main() {
const char * text = "http://mhdfffffjddj:abc123@192.168.0.2:8888/servlet/rece/";
UrlInfo * urlInfo = createUrlInfo(text);
if (isValidUrlInfo(urlInfo)) {
printUrlInfo(urlInfo);
}
deleteUrlInfo(urlInfo);
}int main(int argc, char ** argv) {
const char * text = argv[1];
UrlInfo * urlInfo = createUrlInfo(text);
if (isValidUrlInfo(urlInfo)) {
printUrlInfo(urlInfo);
}
deleteUrlInfo(urlInfo);
}int userlen = (postusername?postusername:postuserinfo) - postprotocol;int userlen = (postusername ? postusername : postuserinfo) - postprotocol;Context
StackExchange Code Review Q#83822, answer score: 5
Revisions (0)
No revisions yet.