Skip to content

Commit 7564435

Browse files
committed
implement logging to SAL via logmsgbot
1 parent 4c83310 commit 7564435

File tree

4 files changed

+60
-13
lines changed

4 files changed

+60
-13
lines changed

README.markdown

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ In a WMF multi-instance environment, iteratively execute nodetool on all local C
5555
#### Synopsis
5656
usage: c-foreach-restart [-h] [-a ATTEMPTS] [-r RETRY]
5757
[--execute-post-shutdown CMD] [-d DELAY]
58+
[--logmsgbot LOGMSGBOT] [--tcpircbot-host HOST]
59+
[--tcpircbot-port PORT] [--phabricator-issue ISSUE]
5860

5961
Cassandra instance restarter
6062

@@ -72,7 +74,22 @@ In a WMF multi-instance environment, iteratively execute nodetool on all local C
7274
-d DELAY, --delay DELAY
7375
Delay between instance restarts (defaults to no
7476
delay).
75-
77+
--logmsgbot LOGMSGBOT
78+
Log restarts to SAL (via logmsgbot and #wikimedia-
79+
operations).
80+
--tcpircbot-host HOST
81+
tcpircbot hostname. Only valid when --logmsgbot is
82+
used. Default: neon.wikimedia.org
83+
--tcpircbot-port PORT
84+
tcpircbot port number. Only valid when --logmsgbot is
85+
used. Default: 9200
86+
--phabricator-issue ISSUE
87+
Phabricator issue to associate these restarts with.
88+
This currently only makes sense in combination with
89+
--logmsgbot where it is included in the formatted log
90+
message.
91+
92+
7693
#### Description
7794
In a WMF multi-instance environment, iteratively restart instances.
7895

c-foreach-restart

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,28 @@ import logging
66
import sys
77
import time
88

9-
from cassandra.tools import get_instances
10-
from cassandra.tools.config import LOG_LEVEL
9+
from cassandra.tools import get_instances
10+
from cassandra.tools.config import LOG_LEVEL
11+
from cassandra.tools.tcpircbot import IrcBot
1112

1213
logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(levelname)-8s %(message)s")
1314

1415

15-
def main(attempts, retry, delay, post_shutdown):
16+
def logmsgbot_message(instance, issue=None):
17+
return "Restarting {0.service_name} {1}".format(instance, "({})".format(issue) if issue else "")
18+
19+
def main():
20+
args = parse_args()
21+
bot = IrcBot(args.tcpircbot_host, args.tcpircbot_port, 1.0)
1622
for instance in get_instances():
17-
instance.restart(attempts=attempts, retry=retry, post_shutdown=post_shutdown)
18-
time.sleep(delay)
23+
if args.logmsgbot:
24+
bot.log(logmsgbot_message(instance, args.phabricator_issue))
25+
instance.restart(
26+
attempts=args.attempts,
27+
retry=args.retry,
28+
post_shutdown=args.execute_post_shutdown
29+
)
30+
time.sleep(args.delay)
1931

2032
def parse_args():
2133
parser = argparse.ArgumentParser(description="Cassandra instance restarter")
@@ -26,11 +38,22 @@ def parse_args():
2638
parser.add_argument("--execute-post-shutdown", metavar="CMD", type=str,
2739
help="Command to execute after Cassandra has been shutdown, and before it "
2840
"is started back up.")
29-
parser.add_argument("-d", "--delay", metavar="DELAY", type=int,
41+
parser.add_argument("-d", "--delay", metavar="DELAY", type=float, default=0.0,
3042
help="Delay between instance restarts (defaults to no delay).")
43+
parser.add_argument("--logmsgbot", action="store_true",
44+
help="Log restarts to SAL (via logmsgbot and #wikimedia-operations).")
45+
parser.add_argument("--tcpircbot-host", default="neon.wikimedia.org", metavar="HOST",
46+
help="tcpircbot hostname. Only valid when --logmsgbot is used. "
47+
"Default: neon.wikimedia.org")
48+
parser.add_argument("--tcpircbot-port", metavar="PORT", type=int, default=9200,
49+
help="tcpircbot port number. Only valid when --logmsgbot is used. "
50+
"Default: 9200")
51+
parser.add_argument("--phabricator-issue", metavar="ISSUE",
52+
help="Phabricator issue to associate these restarts with. This currently "
53+
"only makes sense in combination with --logmsgbot where it is included in "
54+
"the formatted log message.")
3155
return parser.parse_args(sys.argv[1:])
3256

3357

3458
if __name__ == "__main__":
35-
args = parse_args()
36-
main(args.attempts, args.retry, args.delay, args.execute_post_shutdown)
59+
main()

cassandra/tools/instances.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from time import sleep
1313
import yaml
1414

15-
from .config import DESCRIPTOR_DIR
16-
from .nodetool import Nodetool
15+
from .config import DESCRIPTOR_DIR
16+
from .nodetool import Nodetool
1717

1818

1919
def __get_descriptor_files():

cassandra/tools/tcpircbot.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22

3+
import getpass
34
import logging
45
import socket
56

@@ -19,11 +20,17 @@ def log(self, msg, *args, **kwargs):
1920
sock.connect((self.host, self.port))
2021
sock.sendall(IrcBot.format_message(msg, *args, **kwargs))
2122
except (socket.timeout, socket.error, socket.gaierror), err:
22-
logging.error("Unable to send to logmsgbot (SAL): %s", err.message)
23+
logging.error(
24+
"Unable to send to logmsgbot (SAL): socket.error (%s) %s",
25+
err.errno,
26+
err.strerror
27+
)
2328
finally:
2429
if sock:
2530
sock.close()
2631

2732
@classmethod
2833
def format_message(cls, msg, *args, **kwargs):
29-
return "!log " + msg.format(*args, **kwargs)
34+
username = getpass.getuser()
35+
hostname = socket.gethostbyaddr(socket.gethostname())[0]
36+
return "!log {}@{}: ".format(username, hostname) + msg.format(*args, **kwargs)

0 commit comments

Comments
 (0)