forked from gigablast/open-source-search-engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
HttpServer.h
229 lines (182 loc) · 7.25 KB
/
HttpServer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
// Copyright Matt Wells Nov 2000
// . derived from TcpServer
// . fill in our own getMsgSize () -- looks for Content-Length:xxx
// . fill in our own getMsgPiece() -- looks on disk
// . fill in our own putMsgPiece() -- ??? for spidering big files!
// . all the shit is just a generic non-blocking i/o system
// . move data from one file/mem to another file/mem that might be remote
//
//TODO: handle SIG_PIPEs!! use sigaction() ...
//TODO: first packet should have some file in it, not just MIME hdr (avoid TCP delayed ACKS)
// TODO: what's TCP_CORK??? it delays sending a packet until it's full
// which improves performance quite a bit. unsetting TCP_CORK flushes it.
// TODO: investigate sendfile() (copies data between file descriptors)
#ifndef _HTTPSERVER_H_
#define _HTTPSERVER_H_
//#define BGCOLOR "89e3A9" // green
#define BGCOLOR "ffffff" // white
//#define BGCOLOR "d0cfc0" // gray
//#define BGCOLOR "d0d0d9" // blue gray
//#define BGCOLOR "d0cfd0" // gray
//#define BGCOLOR "d6ced6" // bluish gray
#define MAX_DOWNLOADS (MAX_TCP_SOCKS-50)
#include "TcpServer.h"
#include "Url.h"
#include "HttpRequest.h" // for parsing/forming HTTP requests
#include "HttpMime.h"
//this is for low priority requests which come in while we are
//in a quickpoll
#define MAX_REQUEST_QUEUE 128
struct QueuedRequest {
HttpRequest m_r;
TcpSocket *m_s;
long m_page;
};
typedef void (*tcp_callback_t)(void *, TcpSocket *);
long getMsgSize ( char *buf , long bufSize , TcpSocket *s );
bool sendPageAddEvent ( TcpSocket *s , HttpRequest *r );
class HttpServer {
public:
// reset the tcp server
void reset();
// returns false if initialization was unsuccessful
bool init ( short port,
short sslPort ,
void handlerWrapper ( TcpSocket *s) = NULL);
// . returns false if blocked, true otherwise
// . sets errno on error
// . supports partial gets with "offset" and "size"
// . IMPORTANT: we free read/send bufs of TcpSocket after callback
// . IMPORTANT: if you don't like this set s->m_read/sendBuf to NULL
// in your callback function
// . NOTE: this should always block unless errno is set
// . the TcpSocket's callbackData is a file ptr
// . replies MUST fit in memory (we have NOT implemented putMsgPiece())
// . uses the HTTP partial GET command if size is > 0
// . uses regular GET if size is -1
// . otherwise uses the HTTP HEAD command
// . the document will be in the s->m_readBuf/s->m_bytesRead of "s"
// . use Mime class to help parse the readBuf
// . timeout is in milliseconds since last read OR write
// . this now ensures that the read content is NULL terminated!
bool getDoc ( char *url , // Url *url ,
long ip ,
long offset ,
long size ,
time_t ifModifiedSince ,
void *state ,
void (* callback) ( void *state , TcpSocket *s ) ,
long timeout , // 60*1000
long proxyIp ,
short proxyPort,
long maxTextDocLen ,
long maxOtherDocLen ,
char *userAgent = NULL ,
//bool respectDownloadLimit = false ,
// . say HTTP/1.1 instead of 1.0 so we can communicate
// with room alert...
// . we do not support 1.1 that is why you should always
// use 1.0
char *proto = "HTTP/1.0" ,
bool doPost = false ,
char *cookie = NULL );
bool getDoc ( long ip,
long port,
char *request,
long requestLen,
void *state ,
void (* callback)( void *state , TcpSocket *s ) ,
long timeout ,
long maxTextDocLen ,
long maxOtherDocLen );
//bool respectDownloadLimit = false );
bool gotDoc ( long n , TcpSocket *s );
// just make a request with size set to 0 and it'll do a HEAD request
/*
bool getMime ( char *url ,
long timeout ,
long proxyIp ,
short proxyPort ,
void *state ,
void (* callback) ( void *state , TcpSocket *s )) {
return getDoc (url,0,0,0,state,callback,
timeout,proxyIp,proxyPort,-1,-1); };
*/
// . this is public so requestHandlerWrapper() can call it
// . if it returns false "s" will be destroyed w/o a reply
void requestHandler ( TcpSocket *s );
// send an error reply, like "HTTP/1.1 404 Not Found"
bool sendErrorReply ( TcpSocket *s , long error , char *errmsg ,
long *bytesSent = NULL );
// send a "prettier" error reply, formatted in XML if necessary
bool sendQueryErrorReply ( TcpSocket *s , long error , char *errmsg,
long rawFormat, int errnum,
char *content=NULL);
// these are for stopping annoying seo bots
void getKey ( long *key, char *kname,
char *q , long qlen , long now , long s , long n ) ;
void getKeys ( long *key1, long *key2, char *kname1, char *kname2,
char *q , long qlen , long now , long s , long n ) ;
bool hasPermission ( long ip , HttpRequest *r ,
char *q , long qlen , long s , long n ) ;
// . used by the HttpPageX.h classes after making their dynamic content
// . returns false if blocked, true otherwise
// . sets errno on error
// . a cacheTime of -2 means browser should not cache when user
// is clicking forward or hitting back button OR anytime -- no cache!
// . a cacheTime of -1 means browser should not cache when user
// is clicking forward, but caching when clicking back button is ok
// . a cacheTime of 0 tells browser to use local caching rules
bool sendDynamicPage ( TcpSocket *s , char *page , long pageLen ,
long cacheTime = -1 , bool POSTReply = false ,
char *contentType = NULL,
long httpStatus = -1,
char *cookie = NULL,
char *charset = NULL ,
HttpRequest *hr = NULL );
// for PageSockets
TcpServer *getTcp() { return &m_tcp; };
TcpServer *getSSLTcp() { return &m_ssltcp; };
// we contain our own tcp server
TcpServer m_tcp;
TcpServer m_ssltcp;
// cancel the transaction that had this state
void cancel ( void *state ) {
//void (*callback)(void *state, TcpSocket *s) ) {
m_tcp.cancel ( state );//, callback );
};
long m_maxOpenSockets;
//for content-encoding: gzip, we unzip the reply and edit the
//header to reflect the new size and encoding
TcpSocket *unzipReply(TcpSocket* s);
float getCompressionRatio()
{return (float)m_uncompressedBytes/m_bytesDownloaded;}
//this is for low priority requests which come in while we are
//in a quickpoll
bool addToQueue(TcpSocket *s, HttpRequest *r, long page);
bool callQueuedPages();
// private:
// like above but you supply the ip
bool sendRequest ( long ip ,
short port ,
char *request ,
void *state ,
void (* callback) ( void *state , TcpSocket *s ));
// go ahead and start sending the file ("path") over the socket
bool sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin);
bool sendReply2 ( char *mime,
long mimeLen ,
char *content ,
long contentLen ,
TcpSocket *s ,
bool alreadyCompressed = false ,
HttpRequest *hr = NULL) ;
void *states[MAX_DOWNLOADS];
tcp_callback_t callbacks[MAX_DOWNLOADS];
long m_bytesDownloaded;
long m_uncompressedBytes;
//QueuedRequest m_requestQueue[MAX_REQUEST_QUEUE];
//long m_lastSlotUsed;
};
extern class HttpServer g_httpServer;
#endif