Introduction
There are many C++ network libraries. Handy is a network library with C++11 style that is very helpful for in-depth learning of C++.
Code Analysis
Below is the epoll.cc file from handy/raw_examples. It’s an example of level triggering. This HTTP server returns a static resource “123456” regardless of what kind of request it receives. Compilation: c++ -o epoll epoll.cc, execution: sudo ./epoll. The if (con.writeEnabled) statement in sendRes of the source code seems to have some issues, causing problems when sending large resources. I have modified it to correctly send large files.
/*
* Compilation: c++ -o epoll epoll.cc
* Execution: ./epoll
* Testing: curl -v localhost
*/
/*
Running Effect
Run the epoll program with sudo. This program listens on port 80 at 0.0.0.0 on the local machine, running as an HTTP server
Whenever a connection accesses, it returns the static resource httpRes
LT is the default mode
*/
#include <sys/socket.h>
#include <sys/epoll.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <map>
#include <string>
#include <signal.h>
#include <iostream>
using namespace std;
bool output_log = true;
// A macro for printing errors and exiting
#define exit_if(r, ...) if(r) {printf(__VA_ARGS__); printf("%s:%d error no: %d error msg %s\n", __FILE__, __LINE__, errno, strerror(errno)); exit(1);}
// This function is used to set the specified fd to non-blocking status
void setNonBlock(int fd) {
// First, we get the original flags of the file descriptor
int flags = fcntl(fd, F_GETFL, 0);
exit_if(flags<0, "fcntl failed");
// Then add O_NONBLOCK and set it back
int r = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
exit_if(r<0, "fcntl failed");
}
// A wrapper for epoll_ctl, putting events and fd into ev.
// And setting events to trigger when readable and writable
void updateEvents(int efd, int fd, int events, int op) {
struct epoll_event ev = {0};
ev.events = events;
ev.data.fd = fd;
printf("%s fd %d events read %d write %d\n",
op==EPOLL_CTL_MOD?"mod":"add", fd, ev.events & EPOLLIN, ev.events & EPOLLOUT);
int r = epoll_ctl(efd, op, fd, &ev);
exit_if(r, "epoll_ctl failed");
}
// Try to perform accept operation on fd. If successful, add it to the monitoring list of epoll fd. Set epoll events to trigger when data is written.
void handleAccept(int efd, int fd) {
struct sockaddr_in raddr;
socklen_t rsz = sizeof(raddr);
int cfd = accept(fd,(struct sockaddr *)&raddr,&rsz);
exit_if(cfd<0, "accept failed");
sockaddr_in peer, local;
socklen_t alen = sizeof(peer);
int r = getpeername(cfd, (sockaddr*)&peer, &alen);
exit_if(r<0, "getpeername failed");
printf("accept a connection from %s\n", inet_ntoa(raddr.sin_addr));
setNonBlock(cfd);
updateEvents(efd, cfd, EPOLLIN, EPOLL_CTL_ADD);
}
// Represents a connection. Members include data read from the connection, data written
// Is it okay to use string to store binary content, what happens if \0 is encountered?
// No problem, see https://www.zhihu.com/question/33104941
struct Con {
string readed;
size_t written;
bool writeEnabled;
Con(): written(0), writeEnabled(false) {}
};
// Data structure used to map fd to con
map<int, Con> cons;
string httpRes;
// Send resources
void sendRes(int efd, int fd) {
// First get the connection information
Con& con = cons[fd];
// Request to write when no data is received
// This means that data sent last time might have been sent completely
// Its corresponding file descriptor has been deleted in cons
// Then the epoll signal was triggered
// At this time, close its last send flag
// Then close the buffer send trigger epoll flag
// Only keep it triggered when there is data to read
// Why not do this step when all data is written?
// if (!con.readed.length()) {
// if (con.writeEnabled) {
// updateEvents(efd, fd, EPOLLIN, EPOLL_CTL_MOD);
// con.writeEnabled = false;
// }
// return;
// }
// Calculate the length of data that still needs to be written
size_t left = httpRes.length() - con.written;
int wd = 0;
// Continuously write data until the kernel buffer can't accept any more
while((wd=::write(fd, httpRes.data()+con.written, left))>0) {
con.written += wd;
left -= wd;
if(output_log) printf("write %d bytes left: %lu\n", wd, left);
};
// If there is no data to write, delete this connection. But don't disconnect, just empty the connection information
if (left == 0) {
// close(fd); // Keepalive is used in testing, so don't close the connection. The connection will be closed in the read event
if (con.writeEnabled) {
updateEvents(efd, fd, EPOLLIN, EPOLL_CTL_MOD);
con.writeEnabled = false;
}
cons.erase(fd);
return;
}
// If the kernel buffer is full, can't write anymore
if (wd < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
// Mark it as can continue writing
if (!con.writeEnabled) {
// Wait for it to be able to continue writing, or be readable
// Avoid repeated system calls, use con.writeEnabled flag
printf("update it to EPOLLIN|EPOLLOUT\n");
updateEvents(efd, fd, EPOLLIN|EPOLLOUT, EPOLL_CTL_MOD);
con.writeEnabled = true;
}
return;
}
// If it's other situations, such as returning 0 without completing data writing, or returning other errors
// It means an error occurred
if (wd<=0) {
printf("write error for %d: %d %s\n", fd, errno, strerror(errno));
close(fd);
cons.erase(fd);
}
}
// When loop_once processes read data, call this function
void handleRead(int efd, int fd) {
char buf[4096];
int n = 0;
// Read 4k bytes each time, loop to read out all the data already in the kernel (information may be incomplete due to packet splitting)
while ((n=::read(fd, buf, sizeof buf)) > 0) {
if(output_log) printf("read %d bytes\n", n);
// Here, use a map to get the connection information corresponding to the previous fd.
// When the index corresponding to fd does not exist, it will call the default constructor of con: Con(): written(0), writeEnabled(false) {}
string& readed = cons[fd].readed;
// Call the append method of the string class to add data to the connection information
// Note that parameter n needs to be passed to ensure binary safety
readed.append(buf, n);
std::cout << "now info is" << std::endl << "---" << readed << endl << "---" << std::endl;
// Determine when an HTTP request is complete.
// Don't judge the content of the HTTP request, just send static resources
if (readed.length()>4) {
if (readed.substr(readed.length()-2, 2) == "\n\n" || readed.substr(readed.length()-4, 4) == "\r\n\r\n") {
// When a complete HTTP request is read, test sending a response
// After the TCP connection is established, the client starts transmitting the header, then uses \r\n\r\n to mark the end of the header and the beginning of the entity (of course, there will be the beginning of the entity only if the request contains an entity),
// Then the entity is transmitted, when the entity is transmitted, the client starts receiving data, the server knows, this request has ended,
// Then the entity is that segment of data from \r\n\r\n to stopping reception. Correspondingly, the client receives the response in the same way.
// If there is no entity, then \r\n\r\n is the end of http
// Start writing data. Note that it may fill the buffer, if it's full, continue writing later
sendRes(efd, fd);
}
}
}
// If read cannot read, it will return -1. At this time, errno (errno belongs to the thread, it is thread-safe) is EAGAIN, which means it's not all read. EWOULDBLOCK and EAGAIN are the same.
// Then return, and wait for the next read
if (n<0 && (errno == EAGAIN || errno == EWOULDBLOCK)){
printf("nothing to read from %d, return. \n", fd);
return;
}
// In actual applications, n<0 should check various errors, such as EINTR
if (n < 0) {
printf("read %d error: %d %s\n", fd, errno, strerror(errno));
}
// Executing here, n is 0, indicating that the peer has closed the connection. At this time, we also close the connection
printf("%d close the connection\n", fd);
close(fd);
cons.erase(fd);
}
// When the buffer can be written in loop_once, simply write our prepared static resources
void handleWrite(int efd, int fd) {
sendRes(efd, fd);
}
// Perform one operation in a loop on an epoll handle
// Where l is the LISTEN fd
void loop_once(int efd, int lfd, int waitms) {
// At most copy 20 events out from the kernel
const int kMaxEvents = 20;
struct epoll_event activeEvs[100];
int n = epoll_wait(efd, activeEvs, kMaxEvents, waitms);
// n is how many events were returned
if(output_log) printf("epoll_wait return %d\n", n);
for (int i = 0; i < n; i ++) {
int fd = activeEvs[i].data.fd;
int events = activeEvs[i].events;
// EPOLLIN event or EPOLLERR event. EPOLLERR also means the pipe write ended.
// See: http://man7.org/linux/man-pages/man2/epoll_ctl.2.html
if (events & (EPOLLIN | EPOLLERR)) {
// The EPOLLIN event is only triggered when the peer has data written, so after triggering once, you need to keep reading all the data until you finish reading EAGAIN. Otherwise, the remaining data will only be taken out together the next time the peer writes.
// When the other party closes the connection, it is an EPOLLERR event
if (fd == lfd) {
printf("this is accept\n");
handleAccept(efd, fd);
} else {
printf("this can read\n");
handleRead(efd, fd);
}
} else if (events & EPOLLOUT) {
// This handles events if the file descriptor can be written
// The EPOLLOUT event is only triggered once during connection, indicating it can be written
// Afterwards, it indicates that the data in the buffer has been sent out and can continue to be written
// See https://www.zhihu.com/question/22840801
if(output_log) printf("handling epollout\n");
handleWrite(efd, fd);
} else {
exit_if(1, "unknown event");
}
}
}
int main(int argc, const char* argv[]) {
if (argc > 1) { output_log = false; }
/*
Small Knowledge
signal (parameter 1, parameter 2);
Parameter 1: The signal we want to process. We can view the system signals (64 in total) by typing kill -l in the terminal. In fact, these signals are macros defined by the system.
Parameter 2: The way we handle it (system default, ignore, or capture). SIG_IGN: If the func parameter is set to SIG_IGN, the signal will be ignored.
*/
::signal(SIGPIPE, SIG_IGN);
// Set the content of the HTTP response
httpRes = "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\nContent-Type: text/html; charset=UTF-8\r\nContent-Length: 19048576\r\n\r\n123456";
// Fill the rest of the content with 0. The final length of content is about 1024*1024
for(int i=0;i<19048570;i++) {
httpRes+='\0';
}
// Set the port to 80
short port = 80;
// Create an epoll handle
int epollfd = epoll_create(1);
exit_if(epollfd < 0, "epoll_create failed");
// Create a socket
int listenfd = socket(AF_INET, SOCK_STREAM, 0);
exit_if(listenfd < 0, "socket failed");
struct sockaddr_in addr;
memset(&addr, 0, sizeof addr);
addr.sin_family = AF_INET;
addr.sin_port = htons(port);
addr.sin_addr.s_addr = INADDR_ANY;
// First bind the socket to the port
int r = ::bind(listenfd,(struct sockaddr *)&addr, sizeof(struct sockaddr));
// This step will report an error if you don't have superuser permissions. Linux doesn't allow non-root users to use ports below 1024
exit_if(r, "bind to 0.0.0.0:%d failed %d %s", port, errno, strerror(errno));
/*
#include<sys/socket.h>
int listen(int sockfd, int backlog)
Return: 0──success, -1──failure
Parameter sockfd
The socket that the listen function acts on, sockfd was previously returned by the socket function. At the time when the socket function returns the socket fd, it is an active connection socket,
which means the system assumes the user will call the connect function on this socket, expecting it to actively connect with other processes, then in server programming, the user wants this socket to accept external connection requests,
that is, passively wait for users to connect. Since the system assumes by default that a socket is actively connected, it needs to be told in some way, and the user process completes this by making the system call listen.
Parameter backlog
This parameter involves some network details. While a process is handling one connection request, there may be other connection requests.
Because TCP connection is a process, there may be a half-connected state, and sometimes due to too many users trying to connect simultaneously, the server process cannot quickly complete the connection request.
If this situation occurs, how does the server process want the kernel to handle it?
The kernel will maintain a queue in its own process space to track these completed connections that the server process has not yet handled or is processing. Such a queue cannot be arbitrarily large in the kernel,
so there must be an upper limit to its size. This backlog tells the kernel to use this value as the upper limit.
Without a doubt, the server process cannot arbitrarily specify a value, the kernel has a permissible range. This range is implementation-related. It's hard to have some standardization, usually this value will be less than 30.
The length of the queue used by the kernel to track these completed connections but not yet accepted by user code is set to 20 here. When the queue length is less than 20, the kernel will immediately complete the connection establishment.
But if the queue length is greater than 20, the connection will not be established before the user code calls accept, and the other party will be in a blocked state.
*/
r = listen(listenfd, 20);
exit_if(r, "listen failed %d %s", errno, strerror(errno));
printf("fd %d listening at %d\n", listenfd, port);
// Next, set the file descriptor to non-blocking.
// Why set it to non-blocking? https://www.zhihu.com/question/23614342
setNonBlock(listenfd);
// Set it to trigger when readable, add to the epoll file descriptor pool
updateEvents(epollfd, listenfd, EPOLLIN, EPOLL_CTL_ADD);
for (;;) { // Actual applications should register signal handling functions and clean up resources when exiting
loop_once(epollfd, listenfd, 10000);
}
return 0;
}
Running Effect
sudo ./epoll
fd 4 listening at 80
add fd 4 events read 1 write 0
epoll_wait return 1
this is accept
accept a connection from 127.0.0.1
add fd 5 events read 1 write 0
epoll_wait return 1
this can read
read 412 bytes
now info is
---GET / HTTP/1.1
Host: 127.0.0.1
Connection: keep-alive
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36
Upgrade-Insecure-Requests: 1
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9,und;q=0.8,zh-TW;q=0.7,en;q=0.6,pl;q=0.5
---
write 4081834 bytes left: 14966851
update it to EPOLLIN|EPOLLOUT
mod fd 5 events read 1 write 4
nothing to read from 5, return.
epoll_wait return 1
handling epollout
write 2226422 bytes left: 12740429
epoll_wait return 1
handling epollout
write 2095456 bytes left: 10644973
epoll_wait return 1
handling epollout
write 1964490 bytes left: 8680483
epoll_wait return 1
handling epollout
write 1506109 bytes left: 7174374
epoll_wait return 1
handling epollout
write 1833524 bytes left: 5340850
epoll_wait return 1
handling epollout
write 1637075 bytes left: 3703775
write 130966 bytes left: 3572809
epoll_wait return 1
handling epollout
write 1571592 bytes left: 2001217
epoll_wait return 1
handling epollout
write 1440626 bytes left: 560591
epoll_wait return 1
handling epollout
write 560591 bytes left: 0
mod fd 5 events read 1 write 0
epoll_wait return 1
this can read
read 375 bytes
now info is
---GET /favicon.ico HTTP/1.1
Host: 127.0.0.1
Connection: keep-alive
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36
Accept: image/webp,image/apng,image/*,*/*;q=0.8
Referer: http://127.0.0.1/
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9,und;q=0.8,zh-TW;q=0.7,en;q=0.6,pl;q=0.5
---
write 10477280 bytes left: 8571405
update it to EPOLLIN|EPOLLOUT
mod fd 5 events read 1 write 4
nothing to read from 5, return.
epoll_wait return 1
handling epollout
write 1440626 bytes left: 7130779
epoll_wait return 1
handling epollout
write 1768041 bytes left: 5362738
epoll_wait return 1
handling epollout
write 1571592 bytes left: 3791146
epoll_wait return 1
handling epollout
write 1637075 bytes left: 2154071
epoll_wait return 1
handling epollout
write 1702558 bytes left: 451513
epoll_wait return 1
handling epollout
write 451513 bytes left: 0
mod fd 5 events read 1 write 0
epoll_wait return 0
epoll_wait return 0
epoll_wait return 0
Here I increased the size of the resource, changing it to the following value:
httpRes = "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\nContent-Type: text/html; charset=UTF-8\r\nContent-Length: 19048576\r\n\r\n123456";
// Fill the rest of the content with 0. The final length of content is about 1024*1024
for(int i=0;i<19048570;i++) {
httpRes+='\0';
}
You can see it was transmitted in multiple parts. Finally, the terminal page displays 123456, with \0 after it, which won’t be displayed. You can see that the browser made two requests, one for the root directory and one for the page icon favicon.ico </rewritten_file>