"Fossies" - the Fresh Open Source Software Archive 
Member "srg-1.3.6/src/parseURL.cc" (5 Aug 2009, 10255 Bytes) of package /linux/privat/old/srg-1.3.6.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /*
2 SRG - Squid Report Generator
3 Copyright 2005 University of Waikato
4
5 This file is part of SRG.
6
7 SRG is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 SRG is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with SRG; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21 */
22
23 #include "srg.h"
24
25 #define STATE_INIT 0
26 #define STATE_COLON 1
27 #define STATE_PROTOCOL 2
28 #define STATE_PASSPORT 3
29 #define STATE_USERSITE 4
30 #define STATE_SITE 5
31 #define STATE_PORT 6
32 #define STATE_END 7
33
34 /* Parses the specified URL and fills the structure with the different parts
35 * return 0 on success or non-zero otherwise */
36 int parseURL(const char*URL, url_request *req) {
37
38 char *start = strdup(URL);
39 char *tmp = start;
40 char *t1 = NULL;
41 char *t2 = NULL;
42 unsigned int state = STATE_INIT;
43
44 /* Initialise the return structure */
45 req->protocol = NULL;
46 req->user = NULL;
47 req->password = NULL;
48 req->site = NULL;
49 req->port = NULL;
50 req->location = NULL;
51
52 while (*tmp != '\0') {
53 switch (state) {
54 case STATE_INIT:
55 if (*tmp == ':') {
56 /* Cannot Start with : */
57 if (!t1) {
58 free(start);
59 return -1;
60 }
61 *tmp = '\0';
62 t1 = start;
63 state = STATE_COLON;
64 } else if (*tmp == '/') {
65 /* Cannot Start with / */
66 if (!t1) {
67 free(start);
68 return -1;
69 }
70 *tmp = '\0';
71 req->site = strdup(start);
72 t1 = NULL;
73 /* Get the location also */
74 *tmp = '/';
75 req->location = strdup(tmp);
76 state = STATE_END;
77 } else {
78 /* Temporary pointer */
79 t1 = tmp;
80 }
81 break;
82 case STATE_COLON:
83 if (*tmp == '/') {
84 state = STATE_PROTOCOL;
85 } else if(*tmp == '@') {
86 /* user:@ (empty password!) */
87 *tmp = '\0';
88 req->user = strdup(start);
89 req->password = strdup("\0");
90 t1 = NULL;
91 state = STATE_SITE;
92 } else {
93 /* No Protocol, could be password or port */
94 t2 = tmp;
95 state = STATE_PASSPORT;
96 }
97 break;
98 case STATE_PROTOCOL:
99 if (*tmp == '/') {
100 /* Valid protocol found */
101 req->protocol = strdup(start);
102 t1 = NULL;
103 /* Could not have a user or a site */
104 state = STATE_USERSITE;
105 } else {
106 /* :/ is not a valid string in a URL */
107 free(start);
108 return -1;
109 }
110 break;
111 case STATE_USERSITE:
112 if (*tmp == ':') {
113 /* Cannot Start with : */
114 if (!t1) {
115 free(start);
116 return -1;
117 }
118 *tmp = '\0';
119 state = STATE_PASSPORT;
120 } else if (*tmp == '/') {
121 if (!t1) {
122 /* /// is not valid in a URL */
123 free(start);
124 return -1;
125 }
126 /* No User / Pass, No Port */
127 *tmp = '\0';
128 req->site = strdup(t1);
129 /* Get the location also */
130 *tmp = '/';
131 req->location = strdup(tmp);
132 state = STATE_END;
133 }
134 /* Set starting pointer */
135 if (!t1)
136 t1 = tmp;
137 break;
138 case STATE_PASSPORT:
139 if (*tmp == '@') {
140 /* We have user/pass */
141 *tmp ='\0';
142 req->user = strdup(t1);
143 if (t2)
144 req->password = strdup(t2);
145 else
146 req->password = strdup("\0");
147 t1 = NULL;
148 t2 = NULL;
149 state = STATE_SITE;
150 } else if (*tmp == '/') {
151 /* We have site / port */
152 if (!t2) {
153 /* :/ is not valid in a URL */
154 free(start);
155 return -1;
156 }
157 *tmp = '\0';
158 req->site = strdup(t1);
159 req->port = strdup(t2);
160 t1 = NULL;
161 t2 = NULL;
162 /* Get the location also */
163 *tmp = '/';
164 req->location = strdup(tmp);
165 state = STATE_END;
166 }
167 /* Set starting pointer */
168 if (!t2)
169 t2 = tmp;
170 break;
171 case STATE_SITE:
172 if (*tmp == ':') {
173 /* Site / Port found */
174 if (!t1) {
175 /* Cannot have 0 length site */
176 free(start);
177 return -1;
178 }
179 *tmp = '\0';
180 req->site = strdup(t1);
181 t1 = NULL;
182 state = STATE_PORT;
183 } else if (*tmp == '/') {
184 /* Site / Location found */
185 if (!t1) {
186 /* Cannot have 0 length site */
187 free(start);
188 return -1;
189 }
190 *tmp = '\0';
191 req->site = strdup(t1);
192 t1 = NULL;
193 /* Get the location also */
194 *tmp = '/';
195 req->location = strdup(tmp);
196 state = STATE_END;
197 } else {
198 /* Set starting pointer */
199 if (!t1)
200 t1 = tmp;
201 }
202 break;
203 case STATE_PORT:
204 if (*tmp == '/') {
205 /* Port / Location found */
206 if (!t1) {
207 /* Cannot have 0 length port */
208 free(start);
209 return -1;
210 }
211 *tmp = '\0';
212 req->port = strdup(t1);
213 t1 = NULL;
214 /* And get the location also */
215 *tmp = '/';
216 req->location = strdup(tmp);
217 state = STATE_END;
218 }
219 /* Set starting pointer */
220 if (!t1)
221 t1 = tmp;
222 break;
223 case STATE_END:
224 break;
225 break;
226 }
227 /* Move to next character */
228 tmp++;
229 }
230
231 /* Ending cases */
232 switch (state) {
233 case STATE_INIT:
234 /* No : found!, assume form of 'www.google.com',
235 * assign site only
236 */
237 req->site = strdup(start);
238 break;
239 case STATE_COLON:
240 /* INVALID: Cannot end URL with a : */
241 free(start);
242 return -1;
243 break;
244 case STATE_PROTOCOL:
245 /* INVALID: Cannot end URL with :/ */
246 free(start);
247 return -1;
248 break;
249 case STATE_PASSPORT:
250 /* We have a URL of the form 'www.google.com:80' */
251 req->site = strdup(t1);
252 req->port = strdup(t2);
253 break;
254 case STATE_USERSITE:
255 case STATE_SITE:
256 /* Assume that we have found a site */
257 if (!t1) {
258 /* Must be > 0 length */
259 free(start);
260 return -1;
261 }
262 req->site = strdup(t1);
263 break;
264 case STATE_PORT:
265 /* Got port but no location, site is already set */
266 if (!t1) {
267 /* Cannot have 0 length port */
268 free(start);
269 return -1;
270 }
271 req->port = strdup(t1);
272
273 break;
274 case STATE_END:
275 /* All is Well :) */
276 break;
277 }
278
279 /* Always return a valid string for the location */
280 if (!req->location)
281 req->location = strdup("\0");
282
283 free(start);
284
285 return 0;
286 }
287
288 char *asprintURL(url_request *req) {
289
290 char *buffer=NULL;
291
292 /* Check that the parsed string matches the original */
293 if (req->protocol != NULL) {
294 if (req->user != NULL && req->password != NULL) {
295 if (req->port != NULL) {
296 if (req->location != NULL) {
297 asprintf(&buffer,
298 "%s://%s:%s@%s:%s%s",
299 req->protocol, req->user,
300 req->password, req->site,
301 req->port, req->location);
302 } else {
303 asprintf(&buffer,
304 "%s://%s:%s@%s:%s",
305 req->protocol, req->user,
306 req->password, req->site,
307 req->port);
308 }
309 } else {
310 if (req->location != NULL) {
311 asprintf(&buffer,
312 "%s://%s:%s@%s%s",
313 req->protocol, req->user,
314 req->password, req->site,
315 req->location);
316 } else {
317 asprintf(&buffer,
318 "%s://%s:%s@%s",
319 req->protocol, req->user,
320 req->password, req->site);
321 }
322 }
323 } else {
324 if (req->port != NULL) {
325 if (req->location != NULL) {
326 asprintf(&buffer,
327 "%s://%s:%s%s",
328 req->protocol, req->site,
329 req->port, req->location);
330 } else {
331 asprintf(&buffer,
332 "%s://%s:%s",
333 req->protocol, req->site,
334 req->port);
335 }
336 } else {
337 if (req->location != NULL) {
338 asprintf(&buffer,
339 "%s://%s%s",
340 req->protocol, req->site,
341 req->location);
342 } else {
343 asprintf(&buffer,
344 "%s://%s", req->protocol,
345 req->site);
346 }
347 }
348 }
349 } else {
350 if (req->user != NULL && req->password != NULL) {
351 if (req->port != NULL) {
352 if (req->location != NULL) {
353 asprintf(&buffer,
354 "%s:%s@%s:%s%s", req->user,
355 req->password, req->site,
356 req->port, req->location);
357 } else {
358 asprintf(&buffer,
359 "%s:%s@%s:%s", req->user,
360 req->password, req->site,
361 req->port);
362 }
363 } else {
364 if (req->location != NULL) {
365 asprintf(&buffer,
366 "%s:%s@%s%s", req->user,
367 req->password, req->site,
368 req->location);
369 } else {
370 asprintf(&buffer,
371 "%s:%s@%s", req->user,
372 req->password, req->site);
373 }
374 }
375 } else {
376 if (req->port != NULL) {
377 if (req->location != NULL) {
378 asprintf(&buffer,
379 "%s:%s%s", req->site,
380 req->port, req->location);
381 } else {
382 asprintf(&buffer,
383 "%s:%s", req->site,
384 req->port);
385 }
386 } else {
387 if (req->location != NULL) {
388 asprintf(&buffer,
389 "%s%s", req->site,
390 req->location);
391 } else {
392 asprintf(&buffer,
393 "%s", req->site);
394 }
395 }
396 }
397 }
398
399 return buffer;
400
401 }
402
403 void freeURL(url_request *req) {
404
405 /* Free any allocated strings */
406 if (req->protocol)
407 free(req->protocol);
408 if (req->user)
409 free(req->user);
410 if (req->password)
411 free(req->password);
412 if (req->site)
413 free(req->site);
414 if (req->port)
415 free(req->port);
416 if (req->location)
417 free(req->location);
418
419 }
420
421 #ifdef TEST
422 #include <assert.h>
423 void testURL(const char *URL, bool is_invalid);
424
425 int main(int argc, char **argv) {
426
427 testURL("www.google.com", false);
428 testURL("www.google.com:80", false);
429 testURL("http://www.google.com/", false);
430 testURL("http://www.google.com/index.html", false);
431 testURL("http://www.google.com:80/", false);
432 testURL("http://www.google.com:80/index.html", false);
433 testURL("www.google.com:80/index.html", false);
434 testURL("www.google.com/index.html", false);
435 testURL("www.google.com/", false);
436 testURL("www.google.com:80/", false);
437 testURL("matt:@www.google.com/index.html", false);
438 testURL("matt:matt@www.google.com:80/", false);
439 testURL(":matt@www.google.com:80/", true);
440 testURL("ftp://www.google.com/", false);
441
442 exit(0);
443 }
444
445 /* Tests that the specified URL is correctly parsed */
446 void testURL(const char *URL, bool is_invalid) {
447
448 url_request result;
449 char *buffer=NULL;
450
451 if (parseURL(URL, &result)!=0) {
452 /* Invalid URL, cannot parse */
453 assert(is_invalid && true);
454 return;
455 }
456
457 /* Check that the parsed string matches the original */
458 buffer = asprintURL(&result);
459
460 int rv = strcasecmp(buffer, URL);
461 assert(rv==0);
462
463 if (buffer)
464 free(buffer);
465 freeURL(&result);
466
467 return;
468
469 }
470
471 #endif
472