- Notifications
You must be signed in to change notification settings - Fork 2.1k
Description
After a ngx.semaphore.wait() call, if the current request is terminated by Nginx due to client_header_timeout or client_body_timeout, the corresponding post() is never executed. As a result, all subsequent attempts to wait() on the same semaphore hang until they time out, because the semaphore count was never restored.
1. Reproduce by client_header_timeout on ssl_certificate_by_lua_block
#user nobody; worker_processes 1; daemon off; master_process off; error_log logs/debug.log debug; #pid logs/nginx.pid; events { worker_connections 1024; } http { lua_code_cache on; include mime.types; default_type application/octet-stream; log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"'; access_log logs/access.log main; sendfile on; #tcp_nopush on; #keepalive_timeout 0; keepalive_timeout 65; #gzip on; # Shared dictionary for the balancer lua_shared_dict my_balancer_dict 1m; init_worker_by_lua_block { local semaphore = require "ngx.semaphore" -- create a semaphore with capacity = 1 local sema, err = semaphore.new(1) if not sema then ngx.log(ngx.ERR, "failed to create semaphore: ", err) return end ssl_semaphore = sema } server { listen 8080; server_name localhost; location / { content_by_lua_block { ngx.sleep(10) ngx.say("Hello, world!") } } } server { listen 8443 ssl; server_name edge.foo.com; client_header_timeout 8; # your existing certs: ssl_certificate /home/barca/src/d26-sandbox/go/data/server.crt; ssl_certificate_key /home/barca/src/d26-sandbox/go/data/server.key; ssl_certificate_by_lua_block { local sema = ssl_semaphore local ok, err = sema:wait(5) if not ok then ngx.log(ngx.ERR, "SEMA] semaphore wait failed: ", err) return ngx.exit(ngx.ERROR) end ngx.log(ngx.INFO, "SEMA] acquired semaphore, sleeping 30s") local httpc = require "resty.http" local http_client, err = httpc.new() if not http_client then ngx.log(ngx.ERR, "failed to create http client: ", err) sema:post(1) return ngx.exit(ngx.ERROR) end --http_client:set_timeouts(10000, 10000, 10000) -- connect, send, read timeouts local request_uri_to_call = "http://127.0.0.1:8080/some/path" local res, err = http_client:request_uri(request_uri_to_call, { method = "GET", }) if not res then ngx.log(ngx.ERR, "failed to make HTTP request to ", request_uri_to_call, ": ", err) return ngx.exit(ngx.ERROR) end ngx.log(ngx.INFO, "HTTP request to ", request_uri_to_call, " completed with status: ", res.status) local ok, err = http_client:set_keepalive() if not ok then ngx.log(ngx.ERR, "failed to set keepalive: ", err) end local posted, perr = sema:post(1) if not posted then ngx.log(ngx.ERR, "SEMA] semaphore post failed: ", perr) else ngx.log(ngx.INFO, "SEMA] released semaphore") end } location / { return 200 "Hello, world!"; } } } The first request times out in the SSL phase and never calls post().
All subsequent requests hang until their wait() deadlines.
curl -v -o /dev/null --connect-to edge.foo.com:443:127.0.0.1:8443 'https://edge.foo.com/' --insecure curl -v -o /dev/null --connect-to edge.foo.com:443:127.0.0.1:8443 'https://edge.foo.com/' --insecure curl -v -o /dev/null --connect-to edge.foo.com:443:127.0.0.1:8443 'https://edge.foo.com/' --insecure 2. Reproduce by client_body_timeout on access_by_lua_block
#user nobody; worker_processes 1; daemon off; master_process off; #error_log logs/error.log; #error_log logs/error.log notice; #error_log logs/error.log info; error_log logs/debug.log debug; #pid logs/nginx.pid; events { worker_connections 1024; } http { lua_code_cache on; include mime.types; default_type application/octet-stream; log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"'; access_log logs/access.log main; sendfile on; #tcp_nopush on; #keepalive_timeout 0; keepalive_timeout 65; #gzip on; # Shared dictionary for the balancer lua_shared_dict my_balancer_dict 1m; init_worker_by_lua_block { local semaphore = require "ngx.semaphore" local sema, err = semaphore.new(1) if not sema then ngx.log(ngx.ERR, "failed to create semaphore: ", err) return end upload_semaphore = sema } client_body_timeout 5s; server { listen 8000; server_name edge.foo.com; # on 408 jump here error_page 408 = @body_timeout; location /upload { access_by_lua_block { local sema = upload_semaphore local ok, err = sema:wait(10) if not ok then ngx.log(ngx.ERR, "SEMA] semaphore wait failed: ", err) return ngx.exit(500) end ngx.log(ngx.INFO, "SEMA] acquired semaphore, now reading body") ngx.req.read_body() local data = ngx.req.get_body_data() or "" sema:post(1) ngx.log(ngx.INFO, "SEMA] released semaphore") ngx.say("Got ", #data, " bytes of body") } } location @body_timeout { return 408 "Request Timeout: client body not received within 5s\n"; } } } The first slow upload triggers client_body_timeout and never calls post()
Subsequent uploads hang at wait()
( printf 'POST /upload HTTP/1.1\r\n'; printf 'Host: edge.foo.com\r\n'; printf 'Content-Length: 100\r\n'; printf 'Connection: close\r\n'; printf '\r\n'; sleep 10; yes | head -c 100; ) | nc 127.0.0.1 8000 ( printf 'POST /upload HTTP/1.1\r\n'; printf 'Host: edge.foo.com\r\n'; printf 'Content-Length: 100\r\n'; printf 'Connection: close\r\n'; printf '\r\n'; sleep 3; yes | head -c 100; ) | nc 127.0.0.1 8000 ( printf 'POST /upload HTTP/1.1\r\n'; printf 'Host: edge.foo.com\r\n'; printf 'Content-Length: 100\r\n'; printf 'Connection: close\r\n'; printf '\r\n'; sleep 3; yes | head -c 100; ) | nc 127.0.0.1 8000 Proposed Solution
Register a cleanup handler (as done in ngx_http_file_cache or ngx_http_lua_socket_tcp) when acquiring the semaphore. If the request is aborted, this handler should call sema:post() to restore the count.