extremely small and simple HTTP GET/HEAD-only web server for static content
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

669 lines
16 KiB

  1. /* See LICENSE file for copyright and license details. */
  2. #include <arpa/inet.h>
  3. #include <ctype.h>
  4. #include <errno.h>
  5. #include <limits.h>
  6. #include <netinet/in.h>
  7. #include <regex.h>
  8. #include <stddef.h>
  9. #include <stdint.h>
  10. #include <stdio.h>
  11. #include <string.h>
  12. #include <strings.h>
  13. #include <sys/socket.h>
  14. #include <sys/stat.h>
  15. #include <sys/types.h>
  16. #include <time.h>
  17. #include <unistd.h>
  18. #include "config.h"
  19. #include "http.h"
  20. #include "resp.h"
  21. #include "util.h"
  22. const char *req_field_str[] = {
  23. [REQ_HOST] = "Host",
  24. [REQ_RANGE] = "Range",
  25. [REQ_MOD] = "If-Modified-Since",
  26. };
  27. const char *req_method_str[] = {
  28. [M_GET] = "GET",
  29. [M_HEAD] = "HEAD",
  30. };
  31. const char *status_str[] = {
  32. [S_OK] = "OK",
  33. [S_PARTIAL_CONTENT] = "Partial Content",
  34. [S_MOVED_PERMANENTLY] = "Moved Permanently",
  35. [S_NOT_MODIFIED] = "Not Modified",
  36. [S_BAD_REQUEST] = "Bad Request",
  37. [S_FORBIDDEN] = "Forbidden",
  38. [S_NOT_FOUND] = "Not Found",
  39. [S_METHOD_NOT_ALLOWED] = "Method Not Allowed",
  40. [S_REQUEST_TIMEOUT] = "Request Time-out",
  41. [S_RANGE_NOT_SATISFIABLE] = "Range Not Satisfiable",
  42. [S_REQUEST_TOO_LARGE] = "Request Header Fields Too Large",
  43. [S_INTERNAL_SERVER_ERROR] = "Internal Server Error",
  44. [S_VERSION_NOT_SUPPORTED] = "HTTP Version not supported",
  45. };
  46. enum status
  47. http_send_status(int fd, enum status s)
  48. {
  49. static char t[TIMESTAMP_LEN];
  50. if (dprintf(fd,
  51. "HTTP/1.1 %d %s\r\n"
  52. "Date: %s\r\n"
  53. "Connection: close\r\n"
  54. "%s"
  55. "Content-Type: text/html; charset=utf-8\r\n"
  56. "\r\n"
  57. "<!DOCTYPE html>\n<html>\n\t<head>\n"
  58. "\t\t<title>%d %s</title>\n\t</head>\n\t<body>\n"
  59. "\t\t<h1>%d %s</h1>\n\t</body>\n</html>\n",
  60. s, status_str[s], timestamp(time(NULL), t),
  61. (s == S_METHOD_NOT_ALLOWED) ? "Allow: HEAD, GET\r\n" : "",
  62. s, status_str[s], s, status_str[s]) < 0) {
  63. return S_REQUEST_TIMEOUT;
  64. }
  65. return s;
  66. }
  67. static void
  68. decode(char src[PATH_MAX], char dest[PATH_MAX])
  69. {
  70. size_t i;
  71. uint8_t n;
  72. char *s;
  73. for (s = src, i = 0; *s; s++, i++) {
  74. if (*s == '%' && (sscanf(s + 1, "%2hhx", &n) == 1)) {
  75. dest[i] = n;
  76. s += 2;
  77. } else {
  78. dest[i] = *s;
  79. }
  80. }
  81. dest[i] = '\0';
  82. }
  83. int
  84. http_get_request(int fd, struct request *r)
  85. {
  86. struct in6_addr res;
  87. size_t hlen, i, mlen;
  88. ssize_t off;
  89. char h[HEADER_MAX], *p, *q;
  90. /* empty all fields */
  91. memset(r, 0, sizeof(*r));
  92. /*
  93. * receive header
  94. */
  95. for (hlen = 0; ;) {
  96. if ((off = read(fd, h + hlen, sizeof(h) - hlen)) < 0) {
  97. return http_send_status(fd, S_REQUEST_TIMEOUT);
  98. } else if (off == 0) {
  99. break;
  100. }
  101. hlen += off;
  102. if (hlen >= 4 && !memcmp(h + hlen - 4, "\r\n\r\n", 4)) {
  103. break;
  104. }
  105. if (hlen == sizeof(h)) {
  106. return http_send_status(fd, S_REQUEST_TOO_LARGE);
  107. }
  108. }
  109. /* remove terminating empty line */
  110. if (hlen < 2) {
  111. return http_send_status(fd, S_BAD_REQUEST);
  112. }
  113. hlen -= 2;
  114. /* null-terminate the header */
  115. h[hlen] = '\0';
  116. /*
  117. * parse request line
  118. */
  119. /* METHOD */
  120. for (i = 0; i < NUM_REQ_METHODS; i++) {
  121. mlen = strlen(req_method_str[i]);
  122. if (!strncmp(req_method_str[i], h, mlen)) {
  123. r->method = i;
  124. break;
  125. }
  126. }
  127. if (i == NUM_REQ_METHODS) {
  128. return http_send_status(fd, S_METHOD_NOT_ALLOWED);
  129. }
  130. /* a single space must follow the method */
  131. if (h[mlen] != ' ') {
  132. return http_send_status(fd, S_BAD_REQUEST);
  133. }
  134. /* basis for next step */
  135. p = h + mlen + 1;
  136. /* TARGET */
  137. if (!(q = strchr(p, ' '))) {
  138. return http_send_status(fd, S_BAD_REQUEST);
  139. }
  140. *q = '\0';
  141. if (q - p + 1 > PATH_MAX) {
  142. return http_send_status(fd, S_REQUEST_TOO_LARGE);
  143. }
  144. memcpy(r->target, p, q - p + 1);
  145. decode(r->target, r->target);
  146. /* basis for next step */
  147. p = q + 1;
  148. /* HTTP-VERSION */
  149. if (strncmp(p, "HTTP/", sizeof("HTTP/") - 1)) {
  150. return http_send_status(fd, S_BAD_REQUEST);
  151. }
  152. p += sizeof("HTTP/") - 1;
  153. if (strncmp(p, "1.0", sizeof("1.0") - 1) &&
  154. strncmp(p, "1.1", sizeof("1.1") - 1)) {
  155. return http_send_status(fd, S_VERSION_NOT_SUPPORTED);
  156. }
  157. p += sizeof("1.*") - 1;
  158. /* check terminator */
  159. if (strncmp(p, "\r\n", sizeof("\r\n") - 1)) {
  160. return http_send_status(fd, S_BAD_REQUEST);
  161. }
  162. /* basis for next step */
  163. p += sizeof("\r\n") - 1;
  164. /*
  165. * parse request-fields
  166. */
  167. /* match field type */
  168. for (; *p != '\0';) {
  169. for (i = 0; i < NUM_REQ_FIELDS; i++) {
  170. if (!strncasecmp(p, req_field_str[i],
  171. strlen(req_field_str[i]))) {
  172. break;
  173. }
  174. }
  175. if (i == NUM_REQ_FIELDS) {
  176. /* unmatched field, skip this line */
  177. if (!(q = strstr(p, "\r\n"))) {
  178. return http_send_status(fd, S_BAD_REQUEST);
  179. }
  180. p = q + (sizeof("\r\n") - 1);
  181. continue;
  182. }
  183. p += strlen(req_field_str[i]);
  184. /* a single colon must follow the field name */
  185. if (*p != ':') {
  186. return http_send_status(fd, S_BAD_REQUEST);
  187. }
  188. /* skip whitespace */
  189. for (++p; *p == ' ' || *p == '\t'; p++)
  190. ;
  191. /* extract field content */
  192. if (!(q = strstr(p, "\r\n"))) {
  193. return http_send_status(fd, S_BAD_REQUEST);
  194. }
  195. *q = '\0';
  196. if (q - p + 1 > FIELD_MAX) {
  197. return http_send_status(fd, S_REQUEST_TOO_LARGE);
  198. }
  199. memcpy(r->field[i], p, q - p + 1);
  200. /* go to next line */
  201. p = q + (sizeof("\r\n") - 1);
  202. }
  203. /*
  204. * clean up host
  205. */
  206. p = strrchr(r->field[REQ_HOST], ':');
  207. q = strrchr(r->field[REQ_HOST], ']');
  208. /* strip port suffix but don't interfere with IPv6 bracket notation
  209. * as per RFC 2732 */
  210. if (p && (!q || p > q)) {
  211. /* port suffix must not be empty */
  212. if (*(p + 1) == '\0') {
  213. return http_send_status(fd, S_BAD_REQUEST);
  214. }
  215. *p = '\0';
  216. }
  217. /* strip the brackets from the IPv6 notation and validate the address */
  218. if (q) {
  219. /* brackets must be on the outside */
  220. if (r->field[REQ_HOST][0] != '[' || *(q + 1) != '\0') {
  221. return http_send_status(fd, S_BAD_REQUEST);
  222. }
  223. /* remove the right bracket */
  224. *q = '\0';
  225. p = r->field[REQ_HOST] + 1;
  226. /* validate the contained IPv6 address */
  227. if (inet_pton(AF_INET6, p, &res) != 1) {
  228. return http_send_status(fd, S_BAD_REQUEST);
  229. }
  230. /* copy it into the host field */
  231. memmove(r->field[REQ_HOST], p, q - p + 1);
  232. }
  233. return 0;
  234. }
  235. static void
  236. encode(char src[PATH_MAX], char dest[PATH_MAX])
  237. {
  238. size_t i;
  239. char *s;
  240. for (s = src, i = 0; *s && i < (PATH_MAX - 4); s++) {
  241. if (iscntrl(*s) || (unsigned char)*s > 127) {
  242. i += snprintf(dest + i, PATH_MAX - i, "%%%02X",
  243. (unsigned char)*s);
  244. } else {
  245. dest[i] = *s;
  246. i++;
  247. }
  248. }
  249. dest[i] = '\0';
  250. }
  251. static int
  252. normabspath(char *path)
  253. {
  254. size_t len;
  255. int last = 0;
  256. char *p, *q;
  257. /* require and skip first slash */
  258. if (path[0] != '/') {
  259. return 1;
  260. }
  261. p = path + 1;
  262. /* get length of path */
  263. len = strlen(p);
  264. for (; !last; ) {
  265. /* bound path component within (p,q) */
  266. if (!(q = strchr(p, '/'))) {
  267. q = strchr(p, '\0');
  268. last = 1;
  269. }
  270. if (p == q || (q - p == 1 && p[0] == '.')) {
  271. /* "/" or "./" */
  272. goto squash;
  273. } else if (q - p == 2 && p[0] == '.' && p[1] == '.') {
  274. /* "../" */
  275. if (p != path + 1) {
  276. /* place p right after the previous / */
  277. for (p -= 2; p > path && *p != '/'; p--);
  278. p++;
  279. }
  280. goto squash;
  281. } else {
  282. /* move on */
  283. p = q + 1;
  284. continue;
  285. }
  286. squash:
  287. /* squash (p,q) into void */
  288. if (last) {
  289. *p = '\0';
  290. len = p - path;
  291. } else {
  292. memmove(p, q + 1, len - ((q + 1) - path) + 2);
  293. len -= (q + 1) - p;
  294. }
  295. }
  296. return 0;
  297. }
  298. #undef RELPATH
  299. #define RELPATH(x) ((!*(x) || !strcmp(x, "/")) ? "." : ((x) + 1))
  300. enum status
  301. http_send_response(int fd, struct request *r)
  302. {
  303. struct in6_addr res;
  304. struct stat st;
  305. struct tm tm = { 0 };
  306. size_t len, i;
  307. off_t lower, upper;
  308. int hasport, ipv6host;
  309. static char realtarget[PATH_MAX], tmptarget[PATH_MAX], t[TIMESTAMP_LEN];
  310. char *p, *q, *mime;
  311. const char *vhostmatch, *targethost, *err;
  312. /* make a working copy of the target */
  313. memcpy(realtarget, r->target, sizeof(realtarget));
  314. /* match vhost */
  315. vhostmatch = NULL;
  316. if (s.vhost) {
  317. for (i = 0; i < s.vhost_len; i++) {
  318. /* switch to vhost directory if there is a match */
  319. if (!regexec(&s.vhost[i].re, r->field[REQ_HOST], 0,
  320. NULL, 0)) {
  321. if (chdir(s.vhost[i].dir) < 0) {
  322. return http_send_status(fd, (errno == EACCES) ?
  323. S_FORBIDDEN : S_NOT_FOUND);
  324. }
  325. vhostmatch = s.vhost[i].chost;
  326. break;
  327. }
  328. }
  329. if (i == s.vhost_len) {
  330. return http_send_status(fd, S_NOT_FOUND);
  331. }
  332. /* if we have a vhost prefix, prepend it to the target */
  333. if (s.vhost[i].prefix) {
  334. if (esnprintf(tmptarget, sizeof(tmptarget), "%s%s",
  335. s.vhost[i].prefix, realtarget)) {
  336. return http_send_status(fd, S_REQUEST_TOO_LARGE);
  337. }
  338. memcpy(realtarget, tmptarget, sizeof(realtarget));
  339. }
  340. }
  341. /* apply target prefix mapping */
  342. for (i = 0; i < s.map_len; i++) {
  343. len = strlen(s.map[i].from);
  344. if (!strncmp(realtarget, s.map[i].from, len)) {
  345. /* match canonical host if vhosts are enabled and
  346. * the mapping specifies a canonical host */
  347. if (s.vhost && s.map[i].chost &&
  348. strcmp(s.map[i].chost, vhostmatch)) {
  349. continue;
  350. }
  351. /* swap out target prefix */
  352. if (esnprintf(tmptarget, sizeof(tmptarget), "%s%s",
  353. s.map[i].to, realtarget + len)) {
  354. return http_send_status(fd, S_REQUEST_TOO_LARGE);
  355. }
  356. memcpy(realtarget, tmptarget, sizeof(realtarget));
  357. break;
  358. }
  359. }
  360. /* normalize target */
  361. if (normabspath(realtarget)) {
  362. return http_send_status(fd, S_BAD_REQUEST);
  363. }
  364. /* reject hidden target */
  365. if (realtarget[0] == '.' || strstr(realtarget, "/.")) {
  366. return http_send_status(fd, S_FORBIDDEN);
  367. }
  368. /* stat the target */
  369. if (stat(RELPATH(realtarget), &st) < 0) {
  370. return http_send_status(fd, (errno == EACCES) ?
  371. S_FORBIDDEN : S_NOT_FOUND);
  372. }
  373. if (S_ISDIR(st.st_mode)) {
  374. /* add / to target if not present */
  375. len = strlen(realtarget);
  376. if (len >= PATH_MAX - 2) {
  377. return http_send_status(fd, S_REQUEST_TOO_LARGE);
  378. }
  379. if (len && realtarget[len - 1] != '/') {
  380. realtarget[len] = '/';
  381. realtarget[len + 1] = '\0';
  382. }
  383. }
  384. /* redirect if targets differ, host is non-canonical or we prefixed */
  385. if (strcmp(r->target, realtarget) || (s.vhost && vhostmatch &&
  386. strcmp(r->field[REQ_HOST], vhostmatch))) {
  387. /* encode realtarget */
  388. encode(realtarget, tmptarget);
  389. /* send redirection header */
  390. if (s.vhost) {
  391. /* absolute redirection URL */
  392. targethost = r->field[REQ_HOST][0] ? vhostmatch ?
  393. vhostmatch : r->field[REQ_HOST] : s.host ?
  394. s.host : "localhost";
  395. /* do we need to add a port to the Location? */
  396. hasport = s.port && strcmp(s.port, "80");
  397. /* RFC 2732 specifies to use brackets for IPv6-addresses
  398. * in URLs, so we need to check if our host is one and
  399. * honor that later when we fill the "Location"-field */
  400. if ((ipv6host = inet_pton(AF_INET6, targethost,
  401. &res)) < 0) {
  402. return http_send_status(fd,
  403. S_INTERNAL_SERVER_ERROR);
  404. }
  405. if (dprintf(fd,
  406. "HTTP/1.1 %d %s\r\n"
  407. "Date: %s\r\n"
  408. "Connection: close\r\n"
  409. "Location: //%s%s%s%s%s%s\r\n"
  410. "\r\n",
  411. S_MOVED_PERMANENTLY,
  412. status_str[S_MOVED_PERMANENTLY],
  413. timestamp(time(NULL), t),
  414. ipv6host ? "[" : "",
  415. targethost,
  416. ipv6host ? "]" : "", hasport ? ":" : "",
  417. hasport ? s.port : "", tmptarget) < 0) {
  418. return S_REQUEST_TIMEOUT;
  419. }
  420. } else {
  421. /* relative redirection URL */
  422. if (dprintf(fd,
  423. "HTTP/1.1 %d %s\r\n"
  424. "Date: %s\r\n"
  425. "Connection: close\r\n"
  426. "Location: %s\r\n"
  427. "\r\n",
  428. S_MOVED_PERMANENTLY,
  429. status_str[S_MOVED_PERMANENTLY],
  430. timestamp(time(NULL), t),
  431. tmptarget) < 0) {
  432. return S_REQUEST_TIMEOUT;
  433. }
  434. }
  435. return S_MOVED_PERMANENTLY;
  436. }
  437. if (S_ISDIR(st.st_mode)) {
  438. /* append docindex to target */
  439. if (esnprintf(realtarget, sizeof(realtarget), "%s%s",
  440. r->target, s.docindex)) {
  441. return http_send_status(fd, S_REQUEST_TOO_LARGE);
  442. }
  443. /* stat the docindex, which must be a regular file */
  444. if (stat(RELPATH(realtarget), &st) < 0 || !S_ISREG(st.st_mode)) {
  445. if (s.listdirs) {
  446. /* remove index suffix and serve dir */
  447. realtarget[strlen(realtarget) -
  448. strlen(s.docindex)] = '\0';
  449. return resp_dir(fd, RELPATH(realtarget), r);
  450. } else {
  451. /* reject */
  452. if (!S_ISREG(st.st_mode) || errno == EACCES) {
  453. return http_send_status(fd, S_FORBIDDEN);
  454. } else {
  455. return http_send_status(fd, S_NOT_FOUND);
  456. }
  457. }
  458. }
  459. }
  460. /* modified since */
  461. if (r->field[REQ_MOD][0]) {
  462. /* parse field */
  463. if (!strptime(r->field[REQ_MOD], "%a, %d %b %Y %T GMT", &tm)) {
  464. return http_send_status(fd, S_BAD_REQUEST);
  465. }
  466. /* compare with last modification date of the file */
  467. if (difftime(st.st_mtim.tv_sec, timegm(&tm)) <= 0) {
  468. if (dprintf(fd,
  469. "HTTP/1.1 %d %s\r\n"
  470. "Date: %s\r\n"
  471. "Connection: close\r\n"
  472. "\r\n",
  473. S_NOT_MODIFIED, status_str[S_NOT_MODIFIED],
  474. timestamp(time(NULL), t)) < 0) {
  475. return S_REQUEST_TIMEOUT;
  476. }
  477. return S_NOT_MODIFIED;
  478. }
  479. }
  480. /* range */
  481. lower = 0;
  482. upper = st.st_size - 1;
  483. if (r->field[REQ_RANGE][0]) {
  484. /* parse field */
  485. p = r->field[REQ_RANGE];
  486. err = NULL;
  487. if (strncmp(p, "bytes=", sizeof("bytes=") - 1)) {
  488. return http_send_status(fd, S_BAD_REQUEST);
  489. }
  490. p += sizeof("bytes=") - 1;
  491. if (!(q = strchr(p, '-'))) {
  492. return http_send_status(fd, S_BAD_REQUEST);
  493. }
  494. *(q++) = '\0';
  495. /*
  496. * byte-range=first\0last...
  497. * ^ ^
  498. * | |
  499. * p q
  500. */
  501. /*
  502. * make sure we only have a single range,
  503. * and not a comma separated list, which we
  504. * will refuse to accept out of spite towards
  505. * this horrible part of the spec
  506. */
  507. if (strchr(q, ',')) {
  508. goto not_satisfiable;
  509. }
  510. if (p[0] != '\0') {
  511. /*
  512. * Range has format "first-last" or "first-",
  513. * i.e. return bytes 'first' to 'last' (or the
  514. * last byte if 'last' is not given),
  515. * inclusively, and byte-numbering beginning at 0
  516. */
  517. lower = strtonum(p, 0, LLONG_MAX, &err);
  518. if (!err) {
  519. if (q[0] != '\0') {
  520. upper = strtonum(q, 0, LLONG_MAX,
  521. &err);
  522. } else {
  523. upper = st.st_size - 1;
  524. }
  525. }
  526. if (err) {
  527. /* one of the strtonum()'s failed */
  528. return http_send_status(fd, S_BAD_REQUEST);
  529. }
  530. /* check ranges */
  531. if (lower > upper || lower >= st.st_size) {
  532. goto not_satisfiable;
  533. }
  534. /* adjust upper limit to be at most the last byte */
  535. upper = MIN(upper, st.st_size - 1);
  536. } else {
  537. /*
  538. * Range has format "-num", i.e. return the 'num'
  539. * last bytes
  540. */
  541. /*
  542. * use upper as a temporary storage for 'num',
  543. * as we know 'upper' is st.st_size - 1
  544. */
  545. upper = strtonum(q, 0, LLONG_MAX, &err);
  546. if (err) {
  547. return http_send_status(fd, S_BAD_REQUEST);
  548. }
  549. /* determine lower */
  550. if (upper > st.st_size) {
  551. /* more bytes requested than we have */
  552. lower = 0;
  553. } else {
  554. lower = st.st_size - upper;
  555. }
  556. /* set upper to the correct value */
  557. upper = st.st_size - 1;
  558. }
  559. goto satisfiable;
  560. not_satisfiable:
  561. if (dprintf(fd,
  562. "HTTP/1.1 %d %s\r\n"
  563. "Date: %s\r\n"
  564. "Content-Range: bytes */%zu\r\n"
  565. "Connection: close\r\n"
  566. "\r\n",
  567. S_RANGE_NOT_SATISFIABLE,
  568. status_str[S_RANGE_NOT_SATISFIABLE],
  569. timestamp(time(NULL), t),
  570. st.st_size) < 0) {
  571. return S_REQUEST_TIMEOUT;
  572. }
  573. return S_RANGE_NOT_SATISFIABLE;
  574. satisfiable:
  575. ;
  576. }
  577. /* mime */
  578. mime = "application/octet-stream";
  579. if ((p = strrchr(realtarget, '.'))) {
  580. for (i = 0; i < sizeof(mimes) / sizeof(*mimes); i++) {
  581. if (!strcmp(mimes[i].ext, p + 1)) {
  582. mime = mimes[i].type;
  583. break;
  584. }
  585. }
  586. }
  587. return resp_file(fd, RELPATH(realtarget), r, &st, mime, lower, upper);
  588. }