Skip to content

Commit

Permalink
Release 20231218 (#1626)
Browse files Browse the repository at this point in the history
* feat(slow-query): show org and cluster info in clinic (#1621)

* profiling: Pass the TLS certificate to jeprof (#1624)

* support tls for tikv heap profile

Signed-off-by: Connor1996 <[email protected]>

* add log

Signed-off-by: Connor1996 <[email protected]>

* move log

Signed-off-by: Connor1996 <[email protected]>

* use quote

Signed-off-by: Connor1996 <[email protected]>

* add log

Signed-off-by: Connor1996 <[email protected]>

* change

Signed-off-by: Connor1996 <[email protected]>

* clean

Signed-off-by: Connor1996 <[email protected]>

* fix lint

Signed-off-by: Connor1996 <[email protected]>

---------

Signed-off-by: Connor1996 <[email protected]>

* profiling: use debug=1 for goroutine profile to avoid STW (#1614)

* use debug=2 for goroutines

* add comment

---------

Co-authored-by: Yexiang Zhang <[email protected]>
Co-authored-by: Sparkle <[email protected]>

* add ticdc topology in apiserver (#1622)

* support ticdc in apiserver

* address comment

* fix lint

---------

Co-authored-by: Yexiang Zhang <[email protected]>

* update release-version

Signed-off-by: mornyx <[email protected]>

---------

Signed-off-by: Connor1996 <[email protected]>
Signed-off-by: mornyx <[email protected]>
Co-authored-by: Sparkle <[email protected]>
Co-authored-by: Connor <[email protected]>
Co-authored-by: djshow832 <[email protected]>
Co-authored-by: CharlesCheung <[email protected]>
  • Loading branch information
5 people authored Dec 18, 2023
1 parent ce40978 commit aa621ed
Show file tree
Hide file tree
Showing 20 changed files with 320 additions and 39 deletions.
23 changes: 14 additions & 9 deletions cmd/tidb-dashboard/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,24 @@ func NewCLIConfig() *DashboardCLIConfig {

// setup TLS config for TiDB components
if len(*clusterCaPath) != 0 && len(*clusterCertPath) != 0 && len(*clusterKeyPath) != 0 {
cfg.CoreConfig.ClusterTLSConfig = buildTLSConfig(clusterCaPath, clusterKeyPath, clusterCertPath, clusterAllowedNames)
tlsInfo := &transport.TLSInfo{
TrustedCAFile: *clusterCaPath,
KeyFile: *clusterKeyPath,
CertFile: *clusterCertPath,
}
cfg.CoreConfig.ClusterTLSInfo = tlsInfo
cfg.CoreConfig.ClusterTLSConfig = buildTLSConfig(tlsInfo, clusterAllowedNames)
}

// setup TLS config for MySQL client
// See https://github.com/pingcap/docs/blob/7a62321b3ce9318cbda8697503c920b2a01aeb3d/how-to/secure/enable-tls-clients.md#enable-authentication
if (len(*tidbCertPath) != 0 && len(*tidbKeyPath) != 0) || len(*tidbCaPath) != 0 {
cfg.CoreConfig.TiDBTLSConfig = buildTLSConfig(tidbCaPath, tidbKeyPath, tidbCertPath, tidbAllowedNames)
tlsInfo := &transport.TLSInfo{
TrustedCAFile: *tidbCaPath,
KeyFile: *tidbKeyPath,
CertFile: *tidbCertPath,
}
cfg.CoreConfig.TiDBTLSConfig = buildTLSConfig(tlsInfo, tidbAllowedNames)
}

if err := cfg.CoreConfig.NormalizePDEndPoint(); err != nil {
Expand Down Expand Up @@ -140,13 +151,7 @@ func getContext() context.Context {
return ctx
}

func buildTLSConfig(caPath, keyPath, certPath, allowedNames *string) *tls.Config {
tlsInfo := transport.TLSInfo{
TrustedCAFile: *caPath,
KeyFile: *keyPath,
CertFile: *certPath,
}

func buildTLSConfig(tlsInfo *transport.TLSInfo, allowedNames *string) *tls.Config {
tlsConfig, err := tlsInfo.ClientConfig()
if err != nil {
log.Fatal("Failed to load certificates", zap.Error(err))
Expand Down
1 change: 1 addition & 0 deletions pkg/apiserver/apiserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ func newClients(lc fx.Lifecycle, config *config.Config) (
kvClient = tikvclient.NewStatusClient(httpConfig)
csClient = tiflashclient.NewStatusClient(httpConfig)
pdClient = pdclient.NewAPIClient(httpConfig)
// cdcClient = ticdcclient.NewStatusClient(httpConfig)
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
dbClient.SetDefaultCtx(ctx)
Expand Down
8 changes: 8 additions & 0 deletions pkg/apiserver/clusterinfo/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ func (s *Service) fetchAllInstanceHosts() ([]string, error) {
allHostsMap[i.IP] = struct{}{}
}

ticdcIndo, err := topology.FetchTiCDCTopology(s.lifecycleCtx, s.params.EtcdClient)
if err != nil {
return nil, err
}
for _, i := range ticdcIndo {
allHostsMap[i.IP] = struct{}{}
}

allHosts := lo.Keys(allHostsMap)
sort.Strings(allHosts)

Expand Down
16 changes: 16 additions & 0 deletions pkg/apiserver/clusterinfo/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func RegisterRouter(r *gin.RouterGroup, auth *user.AuthService, s *Service) {
endpoint := r.Group("/topology")
endpoint.Use(auth.MWAuthRequired())
endpoint.GET("/tidb", s.getTiDBTopology)
endpoint.GET("/ticdc", s.getTiCDCTopology)
endpoint.DELETE("/tidb/:address", s.deleteTiDBTopology)
endpoint.GET("/store", s.getStoreTopology)
endpoint.GET("/pd", s.getPDTopology)
Expand Down Expand Up @@ -124,6 +125,21 @@ func (s *Service) getTiDBTopology(c *gin.Context) {
c.JSON(http.StatusOK, instances)
}

// @ID getTiCDCTopology
// @Summary Get all TiCDC instances
// @Success 200 {array} topology.TiCDCInfo
// @Router /topology/ticdc [get]
// @Security JwtAuth
// @Failure 401 {object} rest.ErrorResponse
func (s *Service) getTiCDCTopology(c *gin.Context) {
instances, err := topology.FetchTiCDCTopology(s.lifecycleCtx, s.params.EtcdClient)
if err != nil {
rest.Error(c, err)
return
}
c.JSON(http.StatusOK, instances)
}

type StoreTopologyResponse struct {
TiKV []topology.StoreInfo `json:"tikv"`
TiFlash []topology.StoreInfo `json:"tiflash"`
Expand Down
18 changes: 18 additions & 0 deletions pkg/apiserver/clusterinfo/statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ func (s *Service) calculateStatistics(db *gorm.DB) (*ClusterStatistics, error) {
infoByIk["tidb"] = newInstanceKindImmediateInfo()
infoByIk["tikv"] = newInstanceKindImmediateInfo()
infoByIk["tiflash"] = newInstanceKindImmediateInfo()
infoByIk["ticdc"] = newInstanceKindImmediateInfo()

// Fill from topology info
pdInfo, err := topology.FetchPDTopology(s.params.PDClient)
Expand Down Expand Up @@ -113,6 +114,16 @@ func (s *Service) calculateStatistics(db *gorm.DB) (*ClusterStatistics, error) {
globalInfo.instances[net.JoinHostPort(i.IP, strconv.Itoa(int(i.Port)))] = struct{}{}
infoByIk["tidb"].instances[net.JoinHostPort(i.IP, strconv.Itoa(int(i.Port)))] = struct{}{}
}
ticdcInfo, err := topology.FetchTiCDCTopology(s.lifecycleCtx, s.params.EtcdClient)
if err != nil {
return nil, err
}
for _, i := range ticdcInfo {
globalHostsSet[i.IP] = struct{}{}
globalVersionsSet[i.Version] = struct{}{}
globalInfo.instances[net.JoinHostPort(i.IP, strconv.Itoa(int(i.Port)))] = struct{}{}
infoByIk["ticdc"].instances[net.JoinHostPort(i.IP, strconv.Itoa(int(i.Port)))] = struct{}{}
}

// Fill from hardware info
allHostsInfoMap := make(map[string]*hostinfo.Info)
Expand Down Expand Up @@ -162,6 +173,13 @@ func (s *Service) calculateStatistics(db *gorm.DB) (*ClusterStatistics, error) {
globalFailureHostsSet[i.IP] = struct{}{}
}
}
for _, i := range ticdcInfo {
if v, ok := globalInfo.hosts[i.IP]; ok {
infoByIk["ticdc"].hosts[i.IP] = v
} else {
globalFailureHostsSet[i.IP] = struct{}{}
}
}

// Generate result..
versions := lo.Keys(globalVersionsSet)
Expand Down
15 changes: 15 additions & 0 deletions pkg/apiserver/debugapi/endpoint/payload.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,21 @@ func (p *ResolvedRequestPayload) verifyEndpoint(ctx context.Context, etcdClient
if !matched {
return ErrInvalidEndpoint.New("invalid endpoint '%s:%d'", p.host, p.port)
}
case topo.KindTiCDC:
infos, err := topology.FetchTiCDCTopology(ctx, etcdClient)
if err != nil {
return ErrInvalidEndpoint.Wrap(err, "failed to fetch ticdc topology")
}
matched := false
for _, info := range infos {
if info.IP == p.host && info.Port == uint(p.port) {
matched = true
break
}
}
if !matched {
return ErrInvalidEndpoint.New("invalid endpoint '%s:%d'", p.host, p.port)
}
default:
return ErrUnknownComponent.New("Unknown component '%s'", p.api.Component)
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/apiserver/model/common_models.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const (
NodeKindTiKV NodeKind = "tikv"
NodeKindPD NodeKind = "pd"
NodeKindTiFlash NodeKind = "tiflash"
NodeKindTiCDC NodeKind = "ticdc"
)

type RequestTargetNode struct {
Expand All @@ -37,6 +38,7 @@ type RequestTargetStatistics struct {
NumTiDBNodes int `json:"num_tidb_nodes"`
NumPDNodes int `json:"num_pd_nodes"`
NumTiFlashNodes int `json:"num_tiflash_nodes"`
NumTiCDCNodes int `json:"num_ticdc_nodes"`
}

func NewRequestTargetStatisticsFromArray(arr *[]RequestTargetNode) RequestTargetStatistics {
Expand All @@ -51,6 +53,8 @@ func NewRequestTargetStatisticsFromArray(arr *[]RequestTargetNode) RequestTarget
stats.NumPDNodes++
case NodeKindTiFlash:
stats.NumTiFlashNodes++
case NodeKindTiCDC:
stats.NumTiCDCNodes++
}
}
return stats
Expand Down
12 changes: 11 additions & 1 deletion pkg/apiserver/profiling/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"io"
"net"
"os"
"os/exec"
"strconv"
"strings"
Expand Down Expand Up @@ -75,8 +76,17 @@ var jeprof string

func (f *tikvFetcher) fetch(op *fetchOptions) ([]byte, error) {
if strings.HasSuffix(op.path, "heap") {
cmd := exec.Command("perl", "/dev/stdin", "--raw", "http://"+op.ip+":"+strconv.Itoa(op.port)+op.path) //nolint:gosec
scheme := f.client.GetHTTPScheme()
cmd := exec.Command("perl", "/dev/stdin", "--raw", scheme+"://"+op.ip+":"+strconv.Itoa(op.port)+op.path) //nolint:gosec
cmd.Stdin = strings.NewReader(jeprof)
if f.client.GetTLSInfo() != nil {
cmd.Env = append(os.Environ(), fmt.Sprintf(
"URL_FETCHER=curl -s --cert %s --key %s --cacert %s",
f.client.GetTLSInfo().CertFile,
f.client.GetTLSInfo().KeyFile,
f.client.GetTLSInfo().TrustedCAFile,
))
}
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
Expand Down
40 changes: 18 additions & 22 deletions pkg/apiserver/profiling/jeprof.in
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ Miscellaneous:
Environment Variables:
JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof
JEPROF_TOOLS Prefix for object tools pathnames
URL_FETCHER Command to fetch remote profiles
Examples:
Expand Down Expand Up @@ -521,6 +522,11 @@ sub Init() {
$main::prog = "";
@main::pfile_args = ();

# Override url_fetcher variable if URL_FETCHER environment variable is set
if ($ENV{URL_FETCHER}) {
@URL_FETCHER = split(' ', $ENV{URL_FETCHER});
}

# Remote profiling without a binary (using $SYMBOL_PAGE instead)
if (@ARGV > 0) {
if (IsProfileURL($ARGV[0])) {
Expand Down Expand Up @@ -688,15 +694,15 @@ sub Main() {
my $symbol_map = {};

# Read one profile, pick the last item on the list
my $data = ReadProfile($main::prog, pop(@main::profile_files));
my $data = ReadProfile($main::prog, $main::profile_files[0]);
my $profile = $data->{profile};
my $pcs = $data->{pcs};
my $libs = $data->{libs}; # Info about main program and shared libraries
$symbol_map = MergeSymbols($symbol_map, $data->{symbols});

# Add additional profiles, if available.
if (scalar(@main::profile_files) > 0) {
foreach my $pname (@main::profile_files) {
if (scalar(@main::profile_files) > 1) {
foreach my $pname (@main::profile_files[1..$#main::profile_files]) {
my $data2 = ReadProfile($main::prog, $pname);
$profile = AddProfile($profile, $data2->{profile});
$pcs = AddPcs($pcs, $data2->{pcs});
Expand Down Expand Up @@ -3359,21 +3365,18 @@ sub ParseProfileURL {
my $prefix = $3;
my $profile = $4 || "/";

my $host = $hostport;
$host =~ s/:.*//;

my $baseurl = "$proto$hostport$prefix";
return ($host, $baseurl, $profile);
return ($hostport, $baseurl, $profile);
}

# We fetch symbols from the first profile argument.
sub SymbolPageURL {
my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
my ($hostport, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
return "$baseURL$SYMBOL_PAGE";
}

sub FetchProgramName() {
my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
my ($hostport, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
my $url = "$baseURL$PROGRAM_NAME_PAGE";
my $command_line = ShellEscape(@URL_FETCHER, $url);
open(CMDLINE, "$command_line |") or error($command_line);
Expand Down Expand Up @@ -3550,10 +3553,10 @@ sub BaseName {

sub MakeProfileBaseName {
my ($binary_name, $profile_name) = @_;
my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
my ($hostport, $baseURL, $path) = ParseProfileURL($profile_name);
my $binary_shortname = BaseName($binary_name);
return sprintf("%s.%s.%s",
$binary_shortname, $main::op_time, $host);
$binary_shortname, $main::op_time, $hostport);
}

sub FetchDynamicProfile {
Expand All @@ -3565,7 +3568,7 @@ sub FetchDynamicProfile {
if (!IsProfileURL($profile_name)) {
return $profile_name;
} else {
my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
my ($hostport, $baseURL, $path) = ParseProfileURL($profile_name);
if ($path eq "" || $path eq "/") {
# Missing type specifier defaults to cpu-profile
$path = $PROFILE_PAGE;
Expand Down Expand Up @@ -5321,19 +5324,12 @@ sub cleanup {
unlink($main::tmpfile_sym);
unlink(keys %main::tempnames);

# We leave any collected profiles in $HOME/jeprof in case the user wants
# to look at them later. We print a message informing them of this.
if ((scalar(@main::profile_files) > 0) &&
defined($main::collected_profile)) {
if (scalar(@main::profile_files) == 1) {
print STDERR "Dynamically gathered profile is in $main::collected_profile\n";
my @profiles = split(" \\\n ", $main::collected_profile);
foreach my $profile (@profiles) {
unlink($profile);
}
print STDERR "If you want to investigate this profile further, you can do:\n";
print STDERR "\n";
print STDERR " jeprof \\\n";
print STDERR " $main::prog \\\n";
print STDERR " $main::collected_profile\n";
print STDERR "\n";
}
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/apiserver/profiling/pprof.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ func (f *fetcher) FetchAndWriteToFile(duration uint, fileNameWithoutExt string,
fileExtenstion = "*.proto"
}
case ProfilingTypeGoroutine:
url = "/debug/pprof/goroutine?debug=2"
// debug=2 causes STW when collecting the stacks. See https://github.com/pingcap/tidb/issues/48695.
url = "/debug/pprof/goroutine?debug=1"
profilingRawDataType = RawDataTypeText
fileExtenstion = "*.txt"
case ProfilingTypeMutex:
Expand Down
8 changes: 6 additions & 2 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"net/url"
"strings"

"go.etcd.io/etcd/pkg/transport"

"github.com/pingcap/tidb-dashboard/pkg/utils/version"
)

Expand All @@ -24,8 +26,9 @@ type Config struct {
PDEndPoint string
PublicPathPrefix string

ClusterTLSConfig *tls.Config // TLS config for mTLS authentication between TiDB components.
TiDBTLSConfig *tls.Config // TLS config for mTLS authentication between TiDB and MySQL client.
ClusterTLSConfig *tls.Config // TLS config for mTLS authentication between TiDB components.
ClusterTLSInfo *transport.TLSInfo // TLS info for mTLS authentication between TiDB components.
TiDBTLSConfig *tls.Config // TLS config for mTLS authentication between TiDB and MySQL client.

EnableTelemetry bool
EnableExperimental bool
Expand All @@ -41,6 +44,7 @@ func Default() *Config {
PDEndPoint: "http://127.0.0.1:2379",
PublicPathPrefix: defaultPublicPathPrefix,
ClusterTLSConfig: nil,
ClusterTLSInfo: nil,
TiDBTLSConfig: nil,
EnableTelemetry: false,
EnableExperimental: false,
Expand Down
11 changes: 11 additions & 0 deletions pkg/tikv/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strconv"
"time"

"go.etcd.io/etcd/pkg/transport"
"go.uber.org/fx"

"github.com/pingcap/tidb-dashboard/pkg/config"
Expand All @@ -29,6 +30,7 @@ type Client struct {
httpScheme string
lifecycleCtx context.Context
timeout time.Duration
tlsInfo *transport.TLSInfo
}

func NewTiKVClient(lc fx.Lifecycle, httpClient *httpc.Client, config *config.Config) *Client {
Expand All @@ -37,6 +39,7 @@ func NewTiKVClient(lc fx.Lifecycle, httpClient *httpc.Client, config *config.Con
httpScheme: config.GetClusterHTTPScheme(),
lifecycleCtx: nil,
timeout: defaultTiKVStatusAPITimeout,
tlsInfo: config.ClusterTLSInfo,
}

lc.Append(fx.Hook{
Expand All @@ -59,6 +62,14 @@ func (c Client) AddRequestHeader(key, value string) *Client {
return &c
}

func (c *Client) GetHTTPScheme() string {
return c.httpScheme
}

func (c *Client) GetTLSInfo() *transport.TLSInfo {
return c.tlsInfo
}

func (c *Client) Get(host string, statusPort int, relativeURI string) (*httpc.Response, error) {
uri := fmt.Sprintf("%s://%s%s", c.httpScheme, net.JoinHostPort(host, strconv.Itoa(statusPort)), relativeURI)
return c.httpClient.WithTimeout(c.timeout).Send(c.lifecycleCtx, uri, http.MethodGet, nil, ErrTiKVClientRequestFailed, distro.R().TiKV)
Expand Down
Loading

0 comments on commit aa621ed

Please sign in to comment.