Regular "Gateway Timeout" errors from Mattermost / Traefik

I'm hosting Mattermost. While it works, the service is super unstable. Every couple of minutes I receive a 504 error from the HTTP service from traefik. After a couple of seconds it works again. Using a terminal in the machine, I can do "curl http://localhost:8065" and it works. So the service is running and the issue stems from traefik. The system is not used, it's just me testing it. So it's not a load issue. What can I do to analyze / fix the issue? This is the error I get in the logs:
{"ClientAddr":"XXX.XXX.XXX.XXX:9898","ClientHost":"XXX.XXX.XXX.XXX","ClientPort":"9898","ClientUsername":"-","DownstreamContentSize":15,"DownstreamStatus":504,"Duration":30001375870,"OriginContentSize":15,"OriginDuration":30001161933,"OriginStatus":504,"Overhead":213937,"RequestAddr":"mattermost.MYDOMAIN","RequestContentSize":0,"RequestCount":26533,"RequestHost":"mattermost.MYDOMAIN","RequestMethod":"POST","RequestPath":"/api/v4/channels/members/me/view","RequestPort":"-","RequestProtocol":"HTTP/2.0","RequestScheme":"https","RetryAttempts":0,"RouterName":"mattermost-mattermost-cbe7d3-9-websecure@docker","ServiceAddr":"172.20.0.3:8065","ServiceName":"mattermost-mattermost-cbe7d3-9-websecure@docker","ServiceURL":"http://172.20.0.3:8065","StartLocal":"2024-11-07T11:29:48.512383719Z","StartUTC":"2024-11-07T11:29:48.512383719Z","TLSCipher":"TLS_AES_128_GCM_SHA256","TLSVersion":"1.3","entryPointName":"websecure","level":"info","msg":"","time":"2024-11-07T11:30:18Z"}
{"ClientAddr":"XXX.XXX.XXX.XXX:9898","ClientHost":"XXX.XXX.XXX.XXX","ClientPort":"9898","ClientUsername":"-","DownstreamContentSize":15,"DownstreamStatus":504,"Duration":30001375870,"OriginContentSize":15,"OriginDuration":30001161933,"OriginStatus":504,"Overhead":213937,"RequestAddr":"mattermost.MYDOMAIN","RequestContentSize":0,"RequestCount":26533,"RequestHost":"mattermost.MYDOMAIN","RequestMethod":"POST","RequestPath":"/api/v4/channels/members/me/view","RequestPort":"-","RequestProtocol":"HTTP/2.0","RequestScheme":"https","RetryAttempts":0,"RouterName":"mattermost-mattermost-cbe7d3-9-websecure@docker","ServiceAddr":"172.20.0.3:8065","ServiceName":"mattermost-mattermost-cbe7d3-9-websecure@docker","ServiceURL":"http://172.20.0.3:8065","StartLocal":"2024-11-07T11:29:48.512383719Z","StartUTC":"2024-11-07T11:29:48.512383719Z","TLSCipher":"TLS_AES_128_GCM_SHA256","TLSVersion":"1.3","entryPointName":"websecure","level":"info","msg":"","time":"2024-11-07T11:30:18Z"}
5 Replies
johannes0910
johannes0910OP6mo ago
This is the (standard) docker-compose I use to set it up:
# https://docs.docker.com/compose/environment-variables/

version: "2.4"

services:
postgres:
image: postgres:${POSTGRES_IMAGE_TAG}
restart: always
security_opt:
- no-new-privileges:true
#pids_limit: 100
read_only: true
tmpfs:
- /tmp
- /var/run/postgresql
volumes:
- ${POSTGRES_DATA_PATH}:/var/lib/postgresql/data
environment:
# timezone inside container
- TZ

# necessary Postgres options/variables
- POSTGRES_USER:mmuser
- POSTGRES_PASSWORD:mmuser_password
- POSTGRES_DB:mattermost
networks:
- app-network

mattermost:
depends_on:
- postgres
image: mattermost/${MATTERMOST_IMAGE}:${MATTERMOST_IMAGE_TAG}
restart: always
security_opt:
- no-new-privileges:true
#pids_limit: 200
read_only: ${MATTERMOST_CONTAINER_READONLY}
tmpfs:
- /tmp
volumes:
- ${MATTERMOST_CONFIG_PATH}:/mattermost/config:rw
- ${MATTERMOST_DATA_PATH}:/mattermost/data:rw
- ${MATTERMOST_LOGS_PATH}:/mattermost/logs:rw
- ${MATTERMOST_PLUGINS_PATH}:/mattermost/plugins:rw
- ${MATTERMOST_CLIENT_PLUGINS_PATH}:/mattermost/client/plugins:rw
- ${MATTERMOST_BLEVE_INDEXES_PATH}:/mattermost/bleve-indexes:rw


environment:
# timezone inside container
- TZ

options/variables (see env.example)
- MM_SQLSETTINGS_DRIVERNAME
- MM_SQLSETTINGS_DATASOURCE


- MM_BLEVESETTINGS_INDEXDIR


- MM_SERVICESETTINGS_SITEURL
networks:
- app-network

networks:
app-network:
driver: bridge
# https://docs.docker.com/compose/environment-variables/

version: "2.4"

services:
postgres:
image: postgres:${POSTGRES_IMAGE_TAG}
restart: always
security_opt:
- no-new-privileges:true
#pids_limit: 100
read_only: true
tmpfs:
- /tmp
- /var/run/postgresql
volumes:
- ${POSTGRES_DATA_PATH}:/var/lib/postgresql/data
environment:
# timezone inside container
- TZ

# necessary Postgres options/variables
- POSTGRES_USER:mmuser
- POSTGRES_PASSWORD:mmuser_password
- POSTGRES_DB:mattermost
networks:
- app-network

mattermost:
depends_on:
- postgres
image: mattermost/${MATTERMOST_IMAGE}:${MATTERMOST_IMAGE_TAG}
restart: always
security_opt:
- no-new-privileges:true
#pids_limit: 200
read_only: ${MATTERMOST_CONTAINER_READONLY}
tmpfs:
- /tmp
volumes:
- ${MATTERMOST_CONFIG_PATH}:/mattermost/config:rw
- ${MATTERMOST_DATA_PATH}:/mattermost/data:rw
- ${MATTERMOST_LOGS_PATH}:/mattermost/logs:rw
- ${MATTERMOST_PLUGINS_PATH}:/mattermost/plugins:rw
- ${MATTERMOST_CLIENT_PLUGINS_PATH}:/mattermost/client/plugins:rw
- ${MATTERMOST_BLEVE_INDEXES_PATH}:/mattermost/bleve-indexes:rw


environment:
# timezone inside container
- TZ

options/variables (see env.example)
- MM_SQLSETTINGS_DRIVERNAME
- MM_SQLSETTINGS_DATASOURCE


- MM_BLEVESETTINGS_INDEXDIR


- MM_SERVICESETTINGS_SITEURL
networks:
- app-network

networks:
app-network:
driver: bridge
The last uptime checks ... lots of short downtimes
Up 2024-11-07 12:42:45 200 - OK
Down 2024-11-07 12:41:15 Request failed with status code 504
Up 2024-11-07 12:35:15 200 - OK
Down 2024-11-07 12:33:45 Request failed with status code 504
Up 2024-11-07 12:30:37 200 - OK
Down 2024-11-07 12:29:07 Request failed with status code 504
Up 2024-11-07 12:25:07 200 - OK
Down 2024-11-07 12:23:37 Request failed with status code 504
Up 2024-11-07 12:14:36 200 - OK
Down 2024-11-07 12:13:06 Request failed with status code 504
Up 2024-11-07 12:42:45 200 - OK
Down 2024-11-07 12:41:15 Request failed with status code 504
Up 2024-11-07 12:35:15 200 - OK
Down 2024-11-07 12:33:45 Request failed with status code 504
Up 2024-11-07 12:30:37 200 - OK
Down 2024-11-07 12:29:07 Request failed with status code 504
Up 2024-11-07 12:25:07 200 - OK
Down 2024-11-07 12:23:37 Request failed with status code 504
Up 2024-11-07 12:14:36 200 - OK
Down 2024-11-07 12:13:06 Request failed with status code 504
all other containers work fine, so it's just Mattermost having issues. I'm at the latest dokploy version, a system restart did not help After a system reboot mattermost requests to the public domain failed with 504. While the terminal & curl to localhost worked fine and there were no apparant errors. Redeploying and rebuilding didn't help. I then did server, space, clean all (in Dokploy). The machine became available and has been running fine for 30 minutes now.
Siumauricio
Siumauricio6mo ago
Did you used dokploy-network in each service?
johannes0910
johannes0910OP6mo ago
aaaand now it's gone again, not coing up. So it wasn't the docker cache. See the above docker-compose file. I had to create an app-network (for some reason) to get it work this is the docker inspect of the mattermost image: https://pastecode.io/s/z4iik55m Gateway Timeout on public domain, works via terminal on localhost. Traefik Log
{"ClientAddr":"XXXXX:9809","ClientHost":"XXXXX","ClientPort":"9809","ClientUsername":"-","DownstreamContentSize":15,"**DownstreamStatus":504,**"Duration":30001269707,"OriginContentSize":15,"OriginDuration":30001013701,"OriginStatus":504,"Overhead":256006,"RequestAddr":"mattermost.MYDOMAIN","RequestContentSize":0,"RequestCount":4820,"RequestHost":"mattermost.MYDOMAIN","RequestMethod":"GET","RequestPath":"/api/v4/websocket?connection_id=ie8djs8zwbbnmq8m7t47j8oo9e\u0026sequence_number=11\u0026posted_ack=true","RequestPort":"-","RequestProtocol":"HTTP/1.1","RequestScheme":"https","RetryAttempts":0,"RouterName":"mattermost-mattermost-cbe7d3-9-websecure@docker","ServiceAddr":"172.20.0.3:8065","ServiceName":"mattermost-mattermost-cbe7d3-9-websecure@docker","ServiceURL":"http://172.20.0.3:8065","StartLocal":"2024-11-07T15:37:19.181109825Z","StartUTC":"2024-11-07T15:37:19.181109825Z","TLSCipher":"TLS_AES_128_GCM_SHA256","TLSVersion":"1.3","entryPointName":"websecure","level":"info","msg":"","time":"2024-11-07T15:37:49Z"}
{"ClientAddr":"XXXXX:9809","ClientHost":"XXXXX","ClientPort":"9809","ClientUsername":"-","DownstreamContentSize":15,"**DownstreamStatus":504,**"Duration":30001269707,"OriginContentSize":15,"OriginDuration":30001013701,"OriginStatus":504,"Overhead":256006,"RequestAddr":"mattermost.MYDOMAIN","RequestContentSize":0,"RequestCount":4820,"RequestHost":"mattermost.MYDOMAIN","RequestMethod":"GET","RequestPath":"/api/v4/websocket?connection_id=ie8djs8zwbbnmq8m7t47j8oo9e\u0026sequence_number=11\u0026posted_ack=true","RequestPort":"-","RequestProtocol":"HTTP/1.1","RequestScheme":"https","RetryAttempts":0,"RouterName":"mattermost-mattermost-cbe7d3-9-websecure@docker","ServiceAddr":"172.20.0.3:8065","ServiceName":"mattermost-mattermost-cbe7d3-9-websecure@docker","ServiceURL":"http://172.20.0.3:8065","StartLocal":"2024-11-07T15:37:19.181109825Z","StartUTC":"2024-11-07T15:37:19.181109825Z","TLSCipher":"TLS_AES_128_GCM_SHA256","TLSVersion":"1.3","entryPointName":"websecure","level":"info","msg":"","time":"2024-11-07T15:37:49Z"}
Siumauricio
Siumauricio6mo ago
yeah but if you want to use a domain you need to link all the services to dokploy-network
johannes0910
johannes0910OP6mo ago
ok I switched the network to dokploy yesterday. And the machine ran through. So while creating another network works, in principle, it's too unreliable. Thanks for the help!

Did you find this page helpful?