UCAS-Network-Lab-6-Router

Posted on 2023-12-28 Edited on 2024-02-13

折磨从这里开始，填坑也从这里开始。

$\text{0. 路由器所处的层次}$

路由器标志着~~这学期~~上学期计算机网络实验的第二阶段，也就是网络层。

在第一个阶段中，我们大部分时间都在数据链路层工作，这一部分最主要的功能就是根据目的MAC地址来转发数据包，然而，交换机只能提供有限的转发服务，无法实现更大规模的互联。

从网络层开始，就需要有对于IP地址的支持，这就需要不同网络的路由器不断根据IP地址来转发至目标网络。

$\text{1. ARP协议}$

一个前置条件是，我们需要能够根据IP地址来获取MAC地址，这就是ARP协议，其运作原理也非常简单，就是广播询问某个IP地址对应的MAC地址，然后等待回复，如果网络中的主机发现收到的ARP请求中的IP地址与自己的IP地址相同，就回复自己的MAC地址，这样当发送方收到回复时，就知道自己查询的IP地址对应的MAC地址是什么。

另一方面，我们不希望每次都要询问一次，所以我们需要一个ARP缓存，用来保存IP地址与MAC地址的对应关系，这样当我们需要发送数据包时，就可以直接从ARP缓存中获取目标MAC地址。

如果有遇到已知IP地址但是不知道MAC地址的情况，也需要将数据包pending起来放在缓存中。

此外，一般来说，网络里涉及到缓存就一定会有相应的老化机制，定期检查并且删除长时间没有使用的条目，这也是基本操作了。

$\text{2. 最长前缀匹配}$

仅有ip地址并不能唯一标识一个网络，还需要mask才能够确定，这个确定的关系就是最长前缀匹配。

ip地址以及mask唯一地标识了一个网络，具体而言，如果一个ip地址与转发表中的多个ip都有公共的前缀，那么我们就需要选择最长的那个前缀，这样才能够确定唯一的一个网络。

转发表给出了这样标识的网络与端口的对应关系，这样当我们收到一个数据包时，就可以根据目的ip地址按照最长前缀匹配原则来查找转发表，确定转发的端口。

$\text{3. 路由器的转发}$

转发一个ip数据包的过程并不算十分复杂，数据包的IP头部会给出目的ip地址，我们首先需要根据这个ip地址来查找转发表，确定转发的端口，然后要将数据包的TTL字段减一并且判断是否为0，如果为0则丢弃数据包，否则就将数据包转发到对应的端口。

同时，要修改数据包的ETHERNET头部，将源MAC地址改为自己的MAC地址，目的MAC地址改为下一个路由器的MAC地址，然后发送数据包。另一种理解就是，IP协议是建立在更底层的协议之上，IP协议描述了“全局”上传输的源和目标，而ETHERNET协议描述了“局域”上传输的源和目标，一次ETHERNET传输就是整个传输过程的一个环节。

如果由于任何原因导致数据包无法发送至目的地，就需要发送ICMP协议的错误报文，这样发送方就能够得知数据包的传输失败。ICMP协议与IP协议是在同一层面上的东西，因此实现起来也大同小异。

$\text{4. 实现}$

本次实验代码很多，每个模块内部，模块之间的交互都很容易出错。

首先是arp协议部分


void arp_send_request(iface_info_t *iface, u32 dst_ip)
{
	log(DEBUG, "send arp request to "IP_FMT , LE_IP_FMT_STR(dst_ip));
	char* packet = (char *)malloc(ETHER_HDR_SIZE + sizeof(struct ether_arp));
	struct ether_header *eh = (struct ether_header *)packet;
	memset(eh->ether_dhost, 0xff, ETH_ALEN);
	memcpy(eh->ether_shost, iface->mac, ETH_ALEN);
	eh->ether_type = htons(ETH_P_ARP);
	struct ether_arp *arp_hdr = (struct ether_arp *)(packet + ETHER_HDR_SIZE);
	arp_hdr->arp_hrd = htons(ARPHRD_ETHER);
	arp_hdr->arp_pro = htons(ETH_P_IP);
	arp_hdr->arp_hln = ETH_ALEN;
	arp_hdr->arp_pln = 4;
	arp_hdr->arp_op = htons(ARPOP_REQUEST);
	memcpy(arp_hdr->arp_sha, iface->mac, ETH_ALEN);
	memset(arp_hdr->arp_tha, 0, ETH_ALEN);
	arp_hdr->arp_spa = htonl(iface->ip);
	arp_hdr->arp_tpa = htonl(dst_ip);
	iface_send_packet(iface, packet, ETHER_HDR_SIZE + sizeof(struct ether_arp));
}

发送部分其实没什么好说，就是填充各个字段，然后发送。

void arp_send_reply(iface_info_t *iface, struct ether_arp *req_hdr)
{
	log(DEBUG, "send arp reply to "IP_FMT" through %s", NET_IP_FMT_STR(req_hdr->arp_spa), iface->name);
	char* packet = (char *)malloc(ETHER_HDR_SIZE + sizeof(struct ether_arp));
	struct ether_header *eh = (struct ether_header *)packet;
	memcpy(eh->ether_shost, iface->mac, ETH_ALEN);
	memcpy(eh->ether_dhost, req_hdr->arp_sha, ETH_ALEN);
	eh->ether_type = htons(ETH_P_ARP);
	struct ether_arp *arp_hdr = (struct ether_arp *)(packet + ETHER_HDR_SIZE);
	arp_hdr->arp_hrd = htons(ARPHRD_ETHER);
	arp_hdr->arp_pro = htons(ETH_P_IP);
	arp_hdr->arp_hln = ETH_ALEN;
	arp_hdr->arp_pln = 4;
	arp_hdr->arp_op = htons(ARPOP_REPLY);
	memcpy(arp_hdr->arp_sha, iface->mac, ETH_ALEN);
	arp_hdr->arp_spa = htonl(iface->ip);
	memcpy(arp_hdr->arp_tha, req_hdr->arp_sha, ETH_ALEN);
	arp_hdr->arp_tpa = req_hdr->arp_spa;
	iface_send_packet(iface, packet, ETHER_HDR_SIZE + sizeof(struct ether_arp));
}

这是回复的代码，需要注意的就是将端口的MAC地址填充到数据包中，将发送来的包的源mac地址填充到目的mac地址中。

void handle_arp_packet(iface_info_t *iface, char *packet, int len)
{
	struct ether_arp *arp_hdr = (struct ether_arp *)(packet + ETHER_HDR_SIZE);
	if (ntohs(arp_hdr->arp_op) == ARPOP_REQUEST) {
		if (ntohl(arp_hdr->arp_tpa) == iface->ip) {
			arp_send_reply(iface, arp_hdr);
			return ;
		}
		log(DEBUG, "received arp packet not for me");
	}
	else if (ntohs(arp_hdr->arp_op) == ARPOP_REPLY) {
		log(DEBUG, "received arp reply from"IP_FMT", caching it", NET_IP_FMT_STR(arp_hdr->arp_spa));
		arpcache_insert(ntohl(arp_hdr->arp_spa), arp_hdr->arp_sha);
	}
	else {
		log(DEBUG, "received arp packet not request or reply, arp fail");
		icmp_send_packet(packet, len, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH);
	}
	free(packet);
}

void iface_send_packet_by_arp(iface_info_t *iface, u32 dst_ip, char *packet, int len)
{
	struct ether_header *eh = (struct ether_header *)packet;
	eh->ether_type = htons(ETH_P_IP);

	u8 dst_mac[ETH_ALEN];
	int found = arpcache_lookup(dst_ip, dst_mac);
	if (found) {
		log(DEBUG, "found mac for "IP_FMT" in arpcache, send packet", NET_IP_FMT_STR(dst_ip));
		memcpy(eh->ether_shost, iface->mac, ETH_ALEN);
		memcpy(eh->ether_dhost, dst_mac, ETH_ALEN);
		iface_send_packet(iface, packet, len);
	} else {
		log(DEBUG, "not found mac for "IP_FMT" in arpcache, pending packet", NET_IP_FMT_STR(dst_ip));
		arpcache_append_packet(iface, dst_ip, packet, len);
		arp_send_request(iface, dst_ip);	
	}
}

接下来看arp缓存的部分，这个其实就很直观。

int arpcache_lookup(u32 ip4, u8 mac[ETH_ALEN])
{
	// lookup the cache for the entry
	pthread_mutex_lock(&arpcache.lock);
	for (int i = 0; i < MAX_ARP_SIZE; i++) {
		if (arpcache.entries[i].valid && arpcache.entries[i].ip4 == ip4) {
			memcpy(mac, arpcache.entries[i].mac, ETH_ALEN);
			pthread_mutex_unlock(&arpcache.lock);
			return 1;
		}
	}
	pthread_mutex_unlock(&arpcache.lock);
	return 0;
}

void arpcache_append_packet(iface_info_t *iface, u32 ip4, char *packet, int len)
{
	// append the ip address if there is no entry for it
	pthread_mutex_lock(&arpcache.lock);
	struct arp_req *req_entry = NULL;
	list_for_each_entry(req_entry, &(arpcache.req_list), list) {
		if (req_entry->iface == iface && req_entry->ip4 == ip4) {
			log(DEBUG, "append packet to existing arp request, iface %s, ip "IP_FMT, iface->name, NET_IP_FMT_STR(ip4));
			struct cached_pkt *pkt_entry = (struct cached_pkt *)malloc(sizeof(struct cached_pkt));
			pkt_entry->packet = deep_copy(packet, len);
			pkt_entry->len = len;
			list_add_tail(&(pkt_entry->list), &(req_entry->cached_packets));
			pthread_mutex_unlock(&arpcache.lock);
			return;
		}
	}
	log(DEBUG, "create new pending arp request, iface %s, ip "IP_FMT, iface->name, NET_IP_FMT_STR(ip4));
	req_entry = (struct arp_req *)malloc(sizeof(struct arp_req));
	req_entry->iface = iface;
	req_entry->ip4 = ip4;
	init_list_head(&(req_entry->cached_packets));
	struct cached_pkt *pkt_entry = (struct cached_pkt *)malloc(sizeof(struct cached_pkt));
	pkt_entry->packet = deep_copy(packet, len);
	pkt_entry->len = len;
	list_add_tail(&(pkt_entry->list), &(req_entry->cached_packets));
	list_add_tail(&(req_entry->list), &(arpcache.req_list));
	pthread_mutex_unlock(&arpcache.lock);
}

插入缓存时，首先看能不能在已有的缓存项中找到并且替换，然后从pending的列表中找出在等待这个ip地址的数据包，发送出去。

// insert the IP->mac mapping into arpcache, if there are pending packets
// waiting for this mapping, fill the ethernet header for each of them, and send
// them out
void arpcache_insert(u32 ip4, u8 mac[ETH_ALEN])
{
	// insert the ip->mac mapping into the cache
	pthread_mutex_lock(&arpcache.lock);
	int replace = 0;
	for (int i = 0; i < MAX_ARP_SIZE; i++) {
		if (!arpcache.entries[i].valid) {
			arpcache.entries[i].ip4 = ip4;
			memcpy(arpcache.entries[i].mac, mac, ETH_ALEN);
			arpcache.entries[i].added = time(NULL);
			arpcache.entries[i].valid = 1;
			replace = 1;
			break;
		}
	}
	if (!replace) {
		// replace an entry at random
		int i = rand() % MAX_ARP_SIZE;
		arpcache.entries[i].ip4 = ip4;
		memcpy(arpcache.entries[i].mac, mac, ETH_ALEN);
		arpcache.entries[i].added = time(NULL);
		arpcache.entries[i].valid = 1;
	}
	struct arp_req *req_entry = NULL, *req_q;
	// send the pending packets
	list_for_each_entry_safe(req_entry, req_q, &(arpcache.req_list), list) {
		if (req_entry->ip4 == ip4) {
			struct cached_pkt *pkt_entry = NULL, *pkt_q;
			log(DEBUG, "send pending packet to "IP_FMT, NET_IP_FMT_STR(ip4));
			list_for_each_entry_safe(pkt_entry, pkt_q, &(req_entry->cached_packets), list) {
				struct ether_header *eh = (struct ether_header *)(pkt_entry->packet);
				memcpy(eh->ether_dhost, mac, ETH_ALEN);
				iface_send_packet(req_entry->iface, pkt_entry->packet, pkt_entry->len);
				list_delete_entry(&(pkt_entry->list));
				free(pkt_entry);
			}
			list_delete_entry(&(req_entry->list));
			free(req_entry);
		}
	}
	pthread_mutex_unlock(&arpcache.lock);
}

对于老化操作，定时扫描整个缓存，并且对于还在pending的数据包重传一次，当重传次数超过一定次数时，就丢弃这个数据包，并且回复ICMP包表示找不到目的主机。

void *arpcache_sweep(void *arg) 
{
	while (1) {
		sleep(1);
		// sweep the arp cache
		pthread_mutex_lock(&arpcache.lock);
		log(DEBUG, "sweeping arpcache");
		for (int i = 0; i < MAX_ARP_SIZE; i++) 
			if (arpcache.entries[i].valid && time(NULL) - arpcache.entries[i].added > ARP_ENTRY_TIMEOUT) 
				arpcache.entries[i].valid = 0;
		struct arp_req *req_entry = NULL, *req_q;
		list_for_each_entry_safe(req_entry, req_q, &(arpcache.req_list), list) {
			if (time(NULL) - req_entry->sent >= 1) {
				if (req_entry->retries >= ARP_REQUEST_MAX_RETRIES) {
					struct cached_pkt *pkt_entry = NULL, *pkt_q;
					list_for_each_entry_safe(pkt_entry, pkt_q, &(req_entry->cached_packets), list) {
						log(DEBUG, "arp request retries exceeded, send icmp to " IP_FMT, NET_IP_FMT_STR(req_entry->ip4));
						icmp_send_packet(pkt_entry->packet, pkt_entry->len, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH);
						list_delete_entry(&(pkt_entry->list));
						free(pkt_entry->packet);
						free(pkt_entry);
					}
					list_delete_entry(&(req_entry->list));
					free(req_entry);
				}
				else {
					log(DEBUG, "retransmit arp request to " IP_FMT, NET_IP_FMT_STR(req_entry->ip4));
					arp_send_request(req_entry->iface, req_entry->ip4);
					req_entry->sent = time(NULL);
					req_entry->retries++;
				}
			}
		}
		pthread_mutex_unlock(&arpcache.lock);
	}

	return NULL;
}

接下来是处理IP协议的部分

void handle_ip_packet(iface_info_t *iface, char *packet, int len) {
	struct iphdr *ip = packet_to_ip_hdr(packet);
	if (ntohl(ip->daddr) == iface->ip) {
		if (ip->protocol == IPPROTO_ICMP) {
			struct icmphdr *icmp = (struct icmphdr *)((char *)ip + IP_HDR_SIZE(ip));
			if (icmp->type == ICMP_ECHOREQUEST) {
				log(DEBUG, "receive ICMP_ECHOREQUEST, send ICMP_ECHOREPLY");
				icmp_send_packet(packet, len, ICMP_ECHOREPLY, 0);
				free(packet);
				return;
			}
		} else {
			log(DEBUG, "not ICMP_ECHOREQUEST, drop packet");
			free(packet);
			return;
		}
	}
	if (ip->ttl <= 1) {
		log(DEBUG, "TTL <= 1, reply ICMP_TIME_EXCEEDED");
		icmp_send_packet(packet, len, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL);
		free(packet);
		return;
	}
	ip->ttl--;
	log(DEBUG, "Receive packet from "IP_FMT", to "IP_FMT", ttl = %d", NET_IP_FMT_STR(ip->saddr), NET_IP_FMT_STR(ip->daddr), ip->ttl);
	ip->checksum = ip_checksum(ip);
	rt_entry_t *entry = longest_prefix_match(ntohl(ip->daddr));
	if (entry == NULL) {
		log(DEBUG, "no matching entry, reply ICMP_DEST_UNREACH");
		icmp_send_packet(packet, len, ICMP_DEST_UNREACH, ICMP_NET_UNREACH);
		free(packet);
		return;
	}
	// match, forward the packet
	iface_info_t *dest_iface = entry->iface;
	if (dest_iface == NULL) {
		log(DEBUG, "no matching iface, reply ICMP_DEST_UNREACH");
		icmp_send_packet(packet, len, ICMP_DEST_UNREACH, ICMP_NET_UNREACH);
		free(packet);
		return;
	}
	// log the MAC address of the src
	ip_forward_packet(dest_iface, packet, len);	
}

逻辑还是很直观，这里多了一个对于ICMP包的处理，其实也很直观，我们需要的ICMP包大体上只有3种，对于ECHO请求，我们需要回复ECHO回复，对于TTL为0的包，我们需要回复TTL超时，对于找不到匹配的路由表项，我们需要回复目的不可达。

这里涉及到的一些基础函数在ip_base.c中，代码如下

rt_entry_t *longest_prefix_match(u32 dst)
{
	// lookup the routing table to find the entry with the same and longest prefix
	rt_entry_t *entry = NULL, *longest_entry = NULL;
	list_for_each_entry(entry, &rtable, list) {
		if ((dst & entry->mask) == (entry->dest & entry->mask)) {
			if (longest_entry == NULL || entry->mask > longest_entry->mask) {
				longest_entry = entry;
			}
		}
	}
	return longest_entry;
}

void ip_send_packet(char *packet, int len)
{
	struct iphdr* ip = packet_to_ip_hdr(packet);
	rt_entry_t* entry = longest_prefix_match(ntohl(ip->daddr));
	assert(entry != NULL);
	iface_info_t* iface = entry->iface;
	assert(iface != NULL);
	ip->saddr = htonl(iface->ip);
	ip->checksum = ip_checksum(ip);
	log(DEBUG, "send icmp packet to "IP_FMT" from " IP_FMT, NET_IP_FMT_STR(ip->daddr), NET_IP_FMT_STR(ip->saddr));
	iface_send_packet(iface, packet, len);
}

对于本实验而言，ip_send_packet只会用于发送ICMP包，其他情况下发送都是使用的iface_send_packet_by_arp。

说起来其实很简单，但是实际实现的时候，细节颇多：

要注意到iface_send_packet发送的包都会被释放掉，因此不可保存其指针，只可以将其复制一份。
采用C语言实现的网络实验中永远的一个大难题是要小心存储顺序，这点上一定要仔细，最好能自己对于各种接口通过各种方式明确约束。我在实际编写和调试的时候，在这里踩了无数无数的坑，因为彼此的函数调用关系错综复杂，随便一个地方不注意搞反了就会出现奇怪的现象。
要注意老化/多次重发中对于时间，以及重发次数的比较是否带等号，时间上稍微一个写错就有可能错过一个周期。

$\text{5. 总结}$

这次实验的难度在于，代码量很大，而且模块之间的交互很多，调试也相当困难，这次实验耗时也很大，但是收获很多，以前并不知道IP地址为什么如此重要，也不知道为什么网络能够连接到如此远的地方，这次实验真正让我意识到了IP地址在计算机网络中是一个什么样的地位。

0. 路由器所处的层次\text{0. 路由器所处的层次}0. 路由器所处的层次

1. ARP协议\text{1. ARP协议}1. ARP协议

2. 最长前缀匹配\text{2. 最长前缀匹配}2. 最长前缀匹配

3. 路由器的转发\text{3. 路由器的转发}3. 路由器的转发

4. 实现\text{4. 实现}4. 实现

5. 总结\text{5. 总结}5. 总结