这个LAB 是上完CMU CSAPP的21-25 LECTURE之后,就可以做了。
csapp 课程观看地址:https://search.bilibili.com/all?keyword=csapp&from_source=banner_search
lab 6 下载地址: http://csapp.cs.cmu.edu/3e/labs.html
选择PROXY LAB, 点击SELF-STUDY HANDOUT
恭喜你,已经来到了最后一个LAB。我的系列也已经到了尾声。纪念这一个月来的努力。把自己所有的CODE,放到了GITHUB。
https://github.com/yixuaz/csapp-labs
这次的作业主要分三个部分(详情参见WRITE-UP http://csapp.cs.cmu.edu/3e/proxylab.pdf ):
Sequential Proxy: 接收客户端发送的 HTTP 请求,解析之后向目标服务器转发,获得响应之后再转发回客户端
Concurrent Proxy: 在第一步的基础上,支持多线程
Cache Web Objects: 使用 LRU 缓存单独的对象,而不是整个页面
PART 1
第一部分,我的思考笔记如下。
第一步,看懂TINY SERVER(HANDOUT里赠送)的代码。 就大概知道如何写一个SERVER。
第二步,根据WRITE-UP 4 Part I: Implementing a sequential web proxy
大概需要做如下编程工作。服务器端接受请求,解析GET http://www.cmu.edu/hub/index.html HTTP/1.1
转换为 GET /hub/index.html HTTP/1.0
, 同时拿到HOST 和 PORT,代理服务器自己作为CLIENT向目标发送HTTP 1.0请求.
header 部分,先全部保持不变,随后改4个值,
分别为
Host: www.cmu.edu
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3
Connection: close
Proxy-Connection: close
转发送后,把接受到的信息再作为代理服务器的输出,向原客户端转发。
第一部分就大功告成。
第三步 代码实现
3.1 抄TINY SERVER的框架,把一些常量定义掉
#include <stdio.h>
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *conn_hdr = "Connection: close\r\n";
static const char *prox_hdr = "Proxy-Connection: close\r\n";
void doit(int fd);
void clienterror(int fd, char *cause, char *errnum,
char *shortmsg, char *longmsg);
void parse_uri(char *uri,char *hostname,char *path,int *port);
void build_requesthdrs(rio_t *rp, char *newreq, char *hostname);
void *thread(void *vargp);
int main(int argc, char **argv)
{
int listenfd, *connfd;
pthread_t tid;
char hostname[MAXLINE], port[MAXLINE];
socklen_t clientlen;
struct sockaddr_storage clientaddr;
/* Check command line args */
if (argc != 2) {
fprintf(stderr, "usage: %s <port>\n", argv[0]);
exit(1);
}
signal(SIGPIPE, SIG_IGN);
listenfd = Open_listenfd(argv[1]);
while (1) {
printf("listening..\n");
clientlen = sizeof(clientaddr);
connfd = Malloc(sizeof(int));
*connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLINE,
port, MAXLINE, 0);
printf("Accepted connection from (%s, %s)\n", hostname, port);
Pthread_create(&tid, NULL, thread, connfd);
}
}
/* Thread routine */
void *thread(void *vargp)
{
int connfd = *((int *)vargp);
Pthread_detach(pthread_self());
Free(vargp);
doit(connfd);
Close(connfd);
return NULL;
}
/*
* doit - handle one HTTP request/response transaction
*/
/* $begin doit */
void doit(int client_fd)
{
int endserver_fd;
char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
rio_t from_client, to_endserver;
/*store the request line arguments*/
char hostname[MAXLINE],path[MAXLINE];//path eg /hub/index.html
int port;
/* Read request line and headers */
Rio_readinitb(&from_client, client_fd);
if (!Rio_readlineb(&from_client, buf, MAXLINE))
return;
sscanf(buf, "%s %s %s", method, uri, version);
if (strcasecmp(method, "GET")) {
clienterror(client_fd, method, "501", "Not Implemented",
"Proxy Server does not implement this method");
return;
}
//parse uri then open a clientfd
parse_uri(uri, hostname, path, &port);
char port_str[10];
sprintf(port_str, "%d", port);
endserver_fd = Open_clientfd(hostname, port_str);
if(endserver_fd<0){
printf("connection failed\n");
return;
}
Rio_readinitb(&to_endserver, endserver_fd);
char newreq[MAXLINE]; //for end server http req headers
//set up first line eg.GET /hub/index.html HTTP/1.0
sprintf(newreq, "GET %s HTTP/1.0\r\n", path);
build_requesthdrs(&from_client, newreq, hostname);
Rio_writen(endserver_fd, newreq, strlen(newreq)); //send client header to real server
int n;
while ((n = Rio_readlineb(&to_endserver, buf, MAXLINE))) {//real server response to buf
//printf("proxy received %d bytes,then send\n",n);
Rio_writen(client_fd, buf, n); //real server response to real client
}
}
/* $end doit */
/*
* clienterror - returns an error message to the client
*/
/* $begin clienterror */
void clienterror(int fd, char *cause, char *errnum,
char *shortmsg, char *longmsg)
{
char buf[MAXLINE], body[MAXBUF];
/* Build the HTTP response body */
sprintf(body, "<html><title>Proxy Error</title>");
sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
sprintf(body, "%s<hr><em>The Proxy Web server</em>\r\n", body);
/* Print the HTTP response */
sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-type: text/html\r\n");
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
Rio_writen(fd, buf, strlen(buf));
Rio_writen(fd, body, strlen(body));
}
/* $end clienterror */
3.2 实现2个辅助函数
在写PARSE URI方法前,我们得回顾下C 的STR的用法
https://www.cs.cmu.edu/~213/activities/cbootcamp/cbootcamp_s19.pdf
void parse_uri(char *uri,char *hostname,char *path,int *port) {
*port = 80;
//uri http://www.cmu.edu/hub/index.html
char* pos1 = strstr(uri,"//");
if (pos1 == NULL) {
pos1 = uri;
} else pos1 += 2;
//printf("parse uri pos1 %s\n",pos1);//pos1 www.cmu.edu/hub/index.html
char* pos2 = strstr(pos1,":");
/*pos1 www.cmu.edu:8080/hub/index.html, pos2 :8080/hub/index.html */
if (pos2 != NULL) {
*pos2 = '\0'; //pos1 www.cmu.edu/08080/hub/index.html
strncpy(hostname,pos1,MAXLINE);
sscanf(pos2+1,"%d%s",port,path); //pos2+1 8080/hub/index.html
*pos2 = ':';
} else {
pos2 = strstr(pos1,"/");//pos2 /hub/index.html
if (pos2 == NULL) {/*pos1 www.cmu.edu*/
strncpy(hostname,pos1,MAXLINE);
strcpy(path,"");
return;
}
*pos2 = '\0';
strncpy(hostname,pos1,MAXLINE);
*pos2 = '/';
strncpy(path,pos2,MAXLINE);
}
}
void build_requesthdrs(rio_t *rp, char *newreq, char *hostname, char* port) {
//already have sprintf(newreq, "GET %s HTTP/1.0\r\n", path);
char buf[MAXLINE];
while(Rio_readlineb(rp, buf, MAXLINE) > 0) {
if (!strcmp(buf, "\r\n")) break;
if (strstr(buf,"Host:") != NULL) continue;
if (strstr(buf,"User-Agent:") != NULL) continue;
if (strstr(buf,"Connection:") != NULL) continue;
if (strstr(buf,"Proxy-Connection:") != NULL) continue;
sprintf(newreq,"%s%s", newreq,buf);
}
sprintf(newreq, "%sHost: %s:%s\r\n",newreq, hostname,port);
sprintf(newreq, "%s%s%s%s", newreq, user_agent_hdr,conn_hdr,prox_hdr);
sprintf(newreq,"%s\r\n",newreq);
}
3.3 测试
第一部分40分拿齐了。
PART 2
首先阅读ECHO MULTI THREAD的代码
http://www.cs.cmu.edu/afs/cs/academic/class/15213-f18/www/lectures/23-concprog.pdf
随后就根据PPT里的思路 用多线程的方式实现。
进行测试
依然PART 2
本来想些PART3了,但是突然发现,有2个HINT,我都没察觉到,我用过他们。随后就打算试试看自己的PROXY的健壮性,发现用浏览器测试,连百度都上不去呀。
随后根据这篇博客,和一版新的HINT 对我的代码进行优化
https://www.keblog.me/2014/12/writing-proxy-lab-csapp/
依然PART 2.1 修改CSAPP.C做错误保护
这里一律注释掉
如果有错,一律return 0
设置方法
开PROXY SERVER前
开之后
依然PART 2.2 测试有没有File Descriptor泄漏
下面红框的,不应该存在,看来我有FD没有做释放。
在DOIT里面补上这个
PART 3
要实现CACHE的方法,
决定使用数组的方法,为了不浪费空间,决定采用分级数组的思想。(和MALLOC LAB很想)
因为最大缓存对象是100KB, 一共有1M的缓存空间。
我可以用5个100KB (500 KB)
25 KB 可以用12个。(300 KB)
随后10KB 可以用10个。 (100KB)
还有5KB的用20个,(100 KB)
1 KB 用 20个(20 KB)
100B的 用40个 (4KB)
第一步 定义数据结构
//cache.h
#include "csapp.h"
#include <sys/time.h>
#define TYPES 6
extern const int cache_block_size[];
extern const int cache_cnt[];
typedef struct cache_block{
char* url;
char* data;
int datasize;
int64_t time;
pthread_rwlock_t rwlock;
} cache_block;
typedef struct cache_type{
cache_block *cacheobjs;
int size;
} cache_type;
cache_type caches[TYPES];
//intialize cache with malloc
void init_cache();
//if miss cache return 0, hit cache write content to fd
int read_cache(char* url, int fd);
//save value to cache
void write_cache(char* url, char* data, int len);
//free cache
void free_cache();
第二步 实现方法
这里我们用了读者写者模式,并且根据提示。不用严格的按照LRU。这是什么意思的,其实就是暗示我们在读的时候,需要去更新时间错,如果有别的线程也在更新同一个CACHE BLOCK。呢么就按照那个为准,TRY失败了不必强求。
//cache.c
#include "cache.h"
const int cache_block_size[] = {102, 1024, 5120 ,10240,25600, 102400};
const int cache_cnt[] = {40,20,20,10,12,5};
int64_t currentTimeMillis();
void init_cache()
{
int i = 0;
for (; i < TYPES; i++) {
caches[i].size = cache_cnt[i];
caches[i].cacheobjs
= (cache_block *)malloc(cache_cnt[i] * sizeof(cache_block));
cache_block *j = caches[i].cacheobjs;
int k;
for (k = 0; k < cache_cnt[i]; j++, k++) {
j->time = 0;
j->datasize = 0;
j->url = malloc(sizeof(char) * MAXLINE);
strcpy(j->url,"");
j->data = malloc(sizeof(char) * cache_block_size[i]);
memset(j->data,0,cache_block_size[i]);
pthread_rwlock_init(&j->rwlock,NULL);
}
}
}
void free_cache() {
int i = 0;
for (; i < TYPES; i++) {
cache_block *j = caches[i].cacheobjs;
int k;
for (k = 0; k < cache_cnt[i]; j++, k++) {
free(j->url);
free(j->data);
pthread_rwlock_destroy(&j->rwlock);
}
free(caches[i].cacheobjs);
}
}
int read_cache(char *url,int fd){
int tar = 0, i = 0;
cache_type cur;
cache_block *p;
printf("read cache %s \n", url);
for (; tar < TYPES; tar++) {
cur = caches[tar];
p = cur.cacheobjs;
for(i=0;i < cur.size; i++,p++){
if(p->time != 0 && strcmp(url,p->url) == 0) break;
}
if (i < cur.size) break;
}
if(i == cur.size){
printf("read cache fail\n");
return 0;
}
pthread_rwlock_rdlock(&p->rwlock);
if(strcmp(url,p->url) != 0){
pthread_rwlock_unlock(&p->rwlock);
return 0;
}
pthread_rwlock_unlock(&p->rwlock);
if (!pthread_rwlock_trywrlock(&p->rwlock)) {
p->time = currentTimeMillis();
pthread_rwlock_unlock(&p->rwlock);
}
pthread_rwlock_rdlock(&p->rwlock);
Rio_writen(fd,p->data,p->datasize);
pthread_rwlock_unlock(&p->rwlock);
printf("read cache successful\n");
return 1;
}
void write_cache(char *url, char *data, int len){
int tar = 0;
for (; tar < TYPES && len > cache_block_size[tar]; tar++) ;
printf("write cache %s %d\n", url, tar);
/* find empty block */
cache_type cur = caches[tar];
cache_block *p = cur.cacheobjs, *pt;
int i;
for(i=0;i < cur.size;i++,p++){
if(p->time == 0){
break;
}
}
/* find last visited */
int64_t min = currentTimeMillis();
if(i == cur.size){
for(i=0,pt = cur.cacheobjs;i<cur.size;i++,pt++){
if(pt->time <= min){
min = pt->time;
p = pt;
}
}
}
pthread_rwlock_wrlock(&p->rwlock);
p->time = currentTimeMillis();
p->datasize = len;
memcpy(p->url,url,MAXLINE);
memcpy(p->data,data,len);
pthread_rwlock_unlock(&p->rwlock);
printf("write Cache\n");
}
int64_t currentTimeMillis() {
struct timeval time;
gettimeofday(&time, NULL);
int64_t s1 = (int64_t)(time.tv_sec) * 1000;
int64_t s2 = (time.tv_usec / 1000);
return s1 + s2;
}
第三步 整合进现有CODE
3.1 修改MAKE FILE
3.2 增加INIT CACHE
第四步 测试
用浏览器测试前,需要BAN掉浏览器自带的CACHE。
这里我访问的是
http://home.baidu.com/home/index/contact_us
里面会加载很多资料,试了几次,基本都CACHE下来了。
测试是否内存泄漏
valgrind --leak-check=full --show-leak-kinds=all ./proxy 45161
只有一个我的代码无法控制的。