OAPServer 在我的实践中,存储选择了ES7,数据通道选择了kafka,动态配置适配了我司的配置中心(本篇不提),随着对后端功能和代码实现的深入理解会不断调整完善本篇笔记。
关键配置筛选
key | default value | 备注 |
---|---|---|
cluster.zookeeper.hostPort | ||
core.default.recordDataTTL | 3 | 单位是天 |
core.default.metricsDataTTL | 7 | 单位是天 |
core.default.topNReportPeriod | 10 | 单位是分钟 |
core.default.searchableTracesTags | http.method,status_code,db.type,db.instance,mq.queue,mq.topic,mq.broker | 没在此处配置的key,就不能查询 |
core.default.syncThreads | 2 | number of threads ... refresh metrics data to the storage |
core.default.maxSyncOperationNum | 50000 | |
core.default.role | Receiver、Aggregator | 角色 |
storage.elasticsearch7.clusterNodes | es集群地址 | |
storage.elasticsearch7.dayStep | 1 | |
storage.elasticsearch7.indexShardsNumber | 1 | |
storage.elasticsearch7.superDatasetIndexShardsFactor | 6 | |
storage.elasticsearch7.indexReplicasNumber | 1 | shards number = indexShardsNumber * superDatasetIndexShardsFactor |
storage.elasticsearch7.superDatasetIndexReplicasNumber | 0 | |
storage.elasticsearch7.bulkActions | 1000 | 批操作记录数上线 |
storage.elasticsearch7.flushInterval | 10 | 批操作最大间隔 |
storage.elasticsearch7.concurrentRequests | 2 | 并发请求数 |
kafka-fetcher.default.bootstrapServers | kafka集群地址 |
完整的core配置
core:
selector: ${SW_CORE:default}
default:
# Mixed: Receive agent data, Level 1 aggregate, Level 2 aggregate
# Receiver: Receive agent data, Level 1 aggregate
# Aggregator: Level 2 aggregate
role: ${SW_CORE_ROLE:Mixed} # Mixed/Receiver/Aggregator
restHost: ${SW_CORE_REST_HOST:0.0.0.0}
restPort: ${SW_CORE_REST_PORT:12800}
restContextPath: ${SW_CORE_REST_CONTEXT_PATH:/}
restMinThreads: ${SW_CORE_REST_JETTY_MIN_THREADS:1}
restMaxThreads: ${SW_CORE_REST_JETTY_MAX_THREADS:200}
restIdleTimeOut: ${SW_CORE_REST_JETTY_IDLE_TIMEOUT:30000}
restAcceptorPriorityDelta: ${SW_CORE_REST_JETTY_DELTA:0}
restAcceptQueueSize: ${SW_CORE_REST_JETTY_QUEUE_SIZE:0}
gRPCHost: ${SW_CORE_GRPC_HOST:0.0.0.0}
gRPCPort: ${SW_CORE_GRPC_PORT:11800}
maxConcurrentCallsPerConnection: ${SW_CORE_GRPC_MAX_CONCURRENT_CALL:0}
maxMessageSize: ${SW_CORE_GRPC_MAX_MESSAGE_SIZE:0}
gRPCThreadPoolQueueSize: ${SW_CORE_GRPC_POOL_QUEUE_SIZE:-1}
gRPCThreadPoolSize: ${SW_CORE_GRPC_THREAD_POOL_SIZE:-1}
gRPCSslEnabled: ${SW_CORE_GRPC_SSL_ENABLED:false}
gRPCSslKeyPath: ${SW_CORE_GRPC_SSL_KEY_PATH:""}
gRPCSslCertChainPath: ${SW_CORE_GRPC_SSL_CERT_CHAIN_PATH:""}
gRPCSslTrustedCAPath: ${SW_CORE_GRPC_SSL_TRUSTED_CA_PATH:""}
downsampling:
- Hour
- Day
# Set a timeout on metrics data. After the timeout has expired, the metrics data will automatically be deleted.
enableDataKeeperExecutor: ${SW_CORE_ENABLE_DATA_KEEPER_EXECUTOR:true} # Turn it off then automatically metrics data delete will be close.
dataKeeperExecutePeriod: ${SW_CORE_DATA_KEEPER_EXECUTE_PERIOD:5} # How often the data keeper executor runs periodically, unit is minute
recordDataTTL: ${SW_CORE_RECORD_DATA_TTL:3} # Unit is day
metricsDataTTL: ${SW_CORE_METRICS_DATA_TTL:7} # Unit is day
# Cache metrics data for 1 minute to reduce database queries, and if the OAP cluster changes within that minute,
# the metrics may not be accurate within that minute.
enableDatabaseSession: ${SW_CORE_ENABLE_DATABASE_SESSION:true}
topNReportPeriod: ${SW_CORE_TOPN_REPORT_PERIOD:10} # top_n record worker report cycle, unit is minute
# Extra model column are the column defined by in the codes, These columns of model are not required logically in aggregation or further query,
# and it will cause more load for memory, network of OAP and storage.
# But, being activated, user could see the name in the storage entities, which make users easier to use 3rd party tool, such as Kibana->ES, to query the data by themselves.
activeExtraModelColumns: ${SW_CORE_ACTIVE_EXTRA_MODEL_COLUMNS:false}
# The max length of service + instance names should be less than 200
serviceNameMaxLength: ${SW_SERVICE_NAME_MAX_LENGTH:70}
instanceNameMaxLength: ${SW_INSTANCE_NAME_MAX_LENGTH:70}
# The max length of service + endpoint names should be less than 240
endpointNameMaxLength: ${SW_ENDPOINT_NAME_MAX_LENGTH:150}
# Define the set of span tag keys, which should be searchable through the GraphQL.
searchableTracesTags: ${SW_SEARCHABLE_TAG_KEYS:http.method,status_code,db.type,db.instance,mq.queue,mq.topic,mq.broker}
# Define the set of log tag keys, which should be searchable through the GraphQL.
searchableLogsTags: ${SW_SEARCHABLE_LOGS_TAG_KEYS:level}
# The number of threads used to synchronously refresh the metrics data to the storage.
syncThreads: ${SW_CORE_SYNC_THREADS:2}
# The maximum number of processes supported for each synchronous storage operation. When the number of the flush data is greater than this value, it will be assigned to multiple cores for execution.
maxSyncOperationNum: ${SW_CORE_MAX_SYNC_OPERATION_NUM:50000}
apdexThreshold: ${SW_APDEX_THRESHOLD:500}
完整的es7配置
storage:
selector: ${SW_STORAGE:elasticsearch7}//设置为elasticsearch7
elasticsearch7:
nameSpace: ${SW_NAMESPACE:""}
clusterNodes: ${SW_STORAGE_ES_CLUSTER_NODES:}//es集群地址
protocol: ${SW_STORAGE_ES_HTTP_PROTOCOL:"http"}//http协议
trustStorePath: ${SW_STORAGE_ES_SSL_JKS_PATH:""}
trustStorePass: ${SW_STORAGE_ES_SSL_JKS_PASS:""}
dayStep: ${SW_STORAGE_DAY_STEP:1} # Represent the number of days in the one minute/hour/day index.
indexShardsNumber: ${SW_STORAGE_ES_INDEX_SHARDS_NUMBER:6} # Shard number of new indexes
indexReplicasNumber: ${SW_STORAGE_ES_INDEX_REPLICAS_NUMBER:1} # Replicas number of new indexes
# Super data set has been defined in the codes, such as trace segments.The following 3 config would be improve es performance when storage super size data in es.
superDatasetDayStep: ${SW_SUPERDATASET_STORAGE_DAY_STEP:-1} # Represent the number of days in the super size dataset record index, the default value is the same as dayStep when the value is less than 0
superDatasetIndexShardsFactor: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_SHARDS_FACTOR:5} # This factor provides more shards for the super data set, shards number = indexShardsNumber * superDatasetIndexShardsFactor. Also, this factor effects Zipkin and Jaeger traces.
superDatasetIndexReplicasNumber: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_REPLICAS_NUMBER:0} # Represent the replicas number in the super size dataset record index, the default value is 0.
user: ${SW_ES_USER:""}
password: ${SW_ES_PASSWORD:""}
secretsManagementFile: ${SW_ES_SECRETS_MANAGEMENT_FILE:""} # Secrets management file in the properties format includes the username, password, which are managed by 3rd party tool.
bulkActions: ${SW_STORAGE_ES_BULK_ACTIONS:1000} # Execute the async bulk record data every ${SW_STORAGE_ES_BULK_ACTIONS} requests
flushInterval: ${SW_STORAGE_ES_FLUSH_INTERVAL:10} # flush the bulk every 10 seconds whatever the number of requests
concurrentRequests: ${SW_STORAGE_ES_CONCURRENT_REQUESTS:2} # the number of concurrent requests
resultWindowMaxSize: ${SW_STORAGE_ES_QUERY_MAX_WINDOW_SIZE:10000}
metadataQueryMaxSize: ${SW_STORAGE_ES_QUERY_MAX_SIZE:5000}
segmentQueryMaxSize: ${SW_STORAGE_ES_QUERY_SEGMENT_SIZE:200}
profileTaskQueryMaxSize: ${SW_STORAGE_ES_QUERY_PROFILE_TASK_SIZE:200}
oapAnalyzer: ${SW_STORAGE_ES_OAP_ANALYZER:"{\"analyzer\":{\"oap_analyzer\":{\"type\":\"stop\"}}}"} # the oap analyzer.
oapLogAnalyzer: ${SW_STORAGE_ES_OAP_LOG_ANALYZER:"{\"analyzer\":{\"oap_log_analyzer\":{\"type\":\"standard\"}}}"} # the oap log analyzer. It could be customized by the ES analyzer configuration to support more language log formats, such as Chinese log, Japanese log and etc.
advanced: ${SW_STORAGE_ES_ADVANCED:""}
完整的kafka配置
kafka-fetcher:
selector: ${SW_KAFKA_FETCHER:default}
default:
bootstrapServers: ${SW_KAFKA_FETCHER_SERVERS:xxxx}//kafka集群地址
partitions: ${SW_KAFKA_FETCHER_PARTITIONS:50}//分区数
replicationFactor: ${SW_KAFKA_FETCHER_PARTITIONS_FACTOR:2}//副本数
enableMeterSystem: ${SW_KAFKA_FETCHER_ENABLE_METER_SYSTEM:false}
enableLog: ${SW_KAFKA_FETCHER_ENABLE_LOG:false}
isSharding: ${SW_KAFKA_FETCHER_IS_SHARDING:false}
consumePartitions: ${SW_KAFKA_FETCHER_CONSUME_PARTITIONS:""}
kafkaHandlerThreadPoolSize: ${SW_KAFKA_HANDLER_THREAD_POOL_SIZE:-1}
kafkaHandlerThreadPoolQueueSize: ${SW_KAFKA_HANDLER_THREAD_POOL_QUEUE_SIZE:-1}