K8S日志收集方案实践_vlambda技术博客

vlambda
2021-06-01

K8S日志收集方案实践

关于k8s集群的日志收集方案有多种，这里介绍一下我们正在使用的方案。

方案中修改了部分组件配置参数，使整个链路日志落地的延迟时间在4s~5s左右，符合我们的需求。

流程图

  
    
    
  
   
     
     
   +------------------+              +------------------+                  +--------------------+           +----------------------+
   
     
     
   
|                  |              |                  |                  |                    |           |                      |
   
     
     
   
|                  |              |                  |                  |                    | 轮转脚本   |                      |
   
     
     
   
| Fluentd日志采集端  +------------> |FluentBit日志转发端 +--------+-------> | Fluentd日志落地端   +---------> |        AWS S3        |
   
     
     
   
|                  |              |                  |        |         |                    |           |                      |
   
     
     
   
|                  |              |                  |        |         |                    |           |                      |
   
     
     
   
+------------------+              +------------------+        |         +--------------------+           +----------------------+
   
     
     
   
                                                              |
   
     
     
   
                                                              |http
   
     
     
   
                                                              |         +---------------------+
   
     
     
   
                                                              |         |                     |
   
     
     
   
                                                              |         |                     |
   
     
     
   
                                                              +-------->+logExporter日志分析组件|
   
     
     
   
                                                                        |                     |
   
     
     
   
                                                                        |                     |
   
     
     
   
                                                                        +---------------------+

日志采集端

日志采集由部署在k8s集群内的Fluentd完成，Fluntd以DaemonSet的方式部署，Fluentd会自动以tail的方式获取 /var/log/containers 目录下各个容器的日志文件内容并转发出去。

没有用FluentBit收集日志，是因为Fluentd有现成的配置直接使用，不需要再去摸索配置踩一遍坑；如果以后有现成的FluentBit收集k8s集群日志的配置，直接部署替换Fluentd即可。

参考配置：

  
    
    
  
   
     
     
   # yaml来源# https://github.com/fluent/fluentd-kubernetes-daemonset/blob/master/fluentd-daemonset-forward.yaml# 镜像来源，版本参考# https://hub.docker.com/r/fluent/fluentd-kubernetes-daemonset/# 文件内容有部分修改# 1. 修改文件名，添加为v1# 2. 配置必要的环境变量参数# 3. 目前锁定image版本为 v1.11.5-debian-forward-1.0# 4. 覆盖配置文件，缩减日志采集周期，修改部分以 # modification start 标注
   
     
     
   apiVersion
   
     
     
   : v1
   
     
     
   

   
     
     
   kind
   
     
     
   : ConfigMap
   
     
     
   

   
     
     
   metadata
   
     
     
   :
   
     
     
   
  
   
     
     
   name
   
     
     
   : fluentd
   
     
     
   -daemonset
   
     
     
   -fluentconf
   
     
     
   
  
   
     
     
   namespace
   
     
     
   : kube
   
     
     
   -system
   
     
     
   

   
     
     
   data
   
     
     
   :
   
     
     
   
  
   
     
     
   fluent.conf
   
     
     
   :
   
     
     
   |
   
     
     
   
 # AUTOMATICALLY GENERATED
 # DO NOT EDIT THIS FILE DIRECTLY, USE /templates/conf/fluent.conf.erb
   
     
     
   

   
     
     
   
    @include "
   
     
     
   #{ENV['FLUENTD_SYSTEMD_CONF'] || 'systemd'}.conf"
   
     
     
   
    @include "
   
     
     
   #{ENV['FLUENTD_PROMETHEUS_CONF'] || 'prometheus'}.conf"
   
     
     
   
    @include kubernetes.conf
   
     
     
   
    @include conf.d/
   
     
     
   *.conf
   
     
     
   

   
     
     
   
    <match 
   
     
     
   **>
   
     
     
   
      @type forward
   
     
     
   
      @id out_fwd
   
     
     
   
      @log_level info
   
     
     
   
      <server
   
     
     
   >
   
     
     
   
        host "
   
     
     
   #{ENV['FLUENT_FOWARD_HOST']}"
   
     
     
   
        port "
   
     
     
   #{ENV['FLUENT_FOWARD_PORT']}"
   
     
     
   
      </server
   
     
     
   >
   
     
     
   

   
     
     
   
      
   
     
     
   # modification start
   
     
     
   
      <buffer
   
     
     
   >
   
     
     
   
        @type memory
   
     
     
   
        flush_at_shutdown true
   
     
     
   
        flush_mode interval
   
     
     
   
        flush_interval 1s
   
     
     
   

   
     
     
   
        flush_thread_count 4
   
     
     
   
        flush_thread_interval 0.1
   
     
     
   
        flush_thread_burst_interval 0.1
   
     
     
   
      </buffer
   
     
     
   >
   
     
     
   
      
   
     
     
   # modification end
   
     
     
   

   
     
     
   
    </match
   
     
     
   >---
   
     
     
   apiVersion
   
     
     
   : v1
   
     
     
   

   
     
     
   kind
   
     
     
   : ConfigMap
   
     
     
   

   
     
     
   metadata
   
     
     
   :
   
     
     
   
  
   
     
     
   name
   
     
     
   : fluentd
   
     
     
   -daemonset
   
     
     
   -kubernetesconf
   
     
     
   
  
   
     
     
   namespace
   
     
     
   : kube
   
     
     
   -system
   
     
     
   

   
     
     
   data
   
     
     
   :
   
     
     
   
  
   
     
     
   kubernetes.conf
   
     
     
   :
   
     
     
   |
   
     
     
   
 # AUTOMATICALLY GENERATED
 # DO NOT EDIT THIS FILE DIRECTLY, USE /templates/conf/kubernetes.conf.erb
   
     
     
   

   
     
     
   
    <label @FLUENT_LOG
   
     
     
   >
   
     
     
   
      <match fluent.
   
     
     
   **>
   
     
     
   
        @type null
   
     
     
   
        @id ignore_fluent_logs
   
     
     
   
      </match
   
     
     
   >
   
     
     
   
    </label
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_container_logs
   
     
     
   
      path /var/log/containers/
   
     
     
   *.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -containers.log.pos
   
     
     
   
      tag "
   
     
     
   #{ENV['FLUENT_CONTAINER_TAIL_TAG'] || 'kubernetes.*'}"
   
     
     
   
      exclude_path "
   
     
     
   #{ENV['FLUENT_CONTAINER_TAIL_EXCLUDE_PATH'] || use_default}"
   
     
     
   
      read_from_head true
   
     
     
   

   
     
     
   
      
   
     
     
   # modification start
   
     
     
   
      refresh_interval 10
   
     
     
   
      multiline_flush_interval 5
   
     
     
   
      
   
     
     
   # modification end
   
     
     
   

   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type "
   
     
     
   #{ENV['FLUENT_CONTAINER_TAIL_PARSER_TYPE'] || 'json'}"
   
     
     
   
        time_format %Y
   
     
     
   -%m
   
     
     
   -%dT%H
   
     
     
   :%M
   
     
     
   :%S.%NZ
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_minion
   
     
     
   
      path /var/log/salt/minion
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -salt.pos
   
     
     
   
      tag salt
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type regexp
   
     
     
   
        expression /^(
   
     
     
   ?<time
   
     
     
   >[^ 
   
     
     
   ]* 
   
     
     
   [^ 
   
     
     
   ,]
   
     
     
   *)
   
     
     
   [^\
   
     
     
   []
   
     
     
   *\
   
     
     
   [[^\
   
     
     
   ]]
   
     
     
   *\
   
     
     
   ]\
   
     
     
   [(
   
     
     
   ?<severity
   
     
     
   >[^ \
   
     
     
   ]]
   
     
     
   *)
   
     
     
   *\
   
     
     
   ] (
   
     
     
   ?<message
   
     
     
   >.
   
     
     
   *)$/
   
     
     
   
        time_format %Y
   
     
     
   -%m
   
     
     
   -%d %H
   
     
     
   :%M
   
     
     
   :%S
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_startupscript
   
     
     
   
      path /var/log/startupscript.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -startupscript.log.pos
   
     
     
   
      tag startupscript
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type syslog
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_docker
   
     
     
   
      path /var/log/docker.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -docker.log.pos
   
     
     
   
      tag docker
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type regexp
   
     
     
   
        expression /^time="(
   
     
     
   ?<time
   
     
     
   >[^)
   
     
     
   ]
   
     
     
   *)" level=(
   
     
     
   ?<severity
   
     
     
   >[^ 
   
     
     
   ]
   
     
     
   *) msg="(
   
     
     
   ?<message
   
     
     
   >[^"
   
     
     
   ]
   
     
     
   *)"( err="(
   
     
     
   ?<error
   
     
     
   >[^"
   
     
     
   ]
   
     
     
   *)")?( statusCode=($<status_code
   
     
     
   >\d+))
   
     
     
   ?/
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_etcd
   
     
     
   
      path /var/log/etcd.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -etcd.log.pos
   
     
     
   
      tag etcd
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type none
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_kubelet
   
     
     
   
      multiline_flush_interval 5s
   
     
     
   
      path /var/log/kubelet.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -kubelet.log.pos
   
     
     
   
      tag kubelet
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type kubernetes
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_kube_proxy
   
     
     
   
      multiline_flush_interval 5s
   
     
     
   
      path /var/log/kube
   
     
     
   -proxy.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -kube
   
     
     
   -proxy.log.pos
   
     
     
   
      tag kube
   
     
     
   -proxy
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type kubernetes
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_kube_apiserver
   
     
     
   
      multiline_flush_interval 5s
   
     
     
   
      path /var/log/kube
   
     
     
   -apiserver.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -kube
   
     
     
   -apiserver.log.pos
   
     
     
   
      tag kube
   
     
     
   -apiserver
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type kubernetes
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_kube_controller_manager
   
     
     
   
      multiline_flush_interval 5s
   
     
     
   
      path /var/log/kube
   
     
     
   -controller
   
     
     
   -manager.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -kube
   
     
     
   -controller
   
     
     
   -manager.log.pos
   
     
     
   
      tag kube
   
     
     
   -controller
   
     
     
   -manager
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type kubernetes
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_kube_scheduler
   
     
     
   
      multiline_flush_interval 5s
   
     
     
   
      path /var/log/kube
   
     
     
   -scheduler.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -kube
   
     
     
   -scheduler.log.pos
   
     
     
   
      tag kube
   
     
     
   -scheduler
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type kubernetes
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_rescheduler
   
     
     
   
      multiline_flush_interval 5s
   
     
     
   
      path /var/log/rescheduler.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -rescheduler.log.pos
   
     
     
   
      tag rescheduler
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type kubernetes
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_glbc
   
     
     
   
      multiline_flush_interval 5s
   
     
     
   
      path /var/log/glbc.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -glbc.log.pos
   
     
     
   
      tag glbc
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type kubernetes
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_cluster_autoscaler
   
     
     
   
      multiline_flush_interval 5s
   
     
     
   
      path /var/log/cluster
   
     
     
   -autoscaler.log
   
     
     
   
      pos_file /var/log/fluentd
   
     
     
   -cluster
   
     
     
   -autoscaler.log.pos
   
     
     
   
      tag cluster
   
     
     
   -autoscaler
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type kubernetes
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    
   
     
     
   # Example:
   
     
     
   
    
   
     
     
   # 2017-02-09T00:15:57.992775796Z AUDIT: id="90c73c7c-97d6-4b65-9461-f94606ff825f" ip="104.132.1.72" method="GET" user="kubecfg" as="<self>" asgroups="<lookup>" namespace="default" uri="/api/v1/namespaces/default/pods"
   
     
     
   
    
   
     
     
   # 2017-02-09T00:15:57.993528822Z AUDIT: id="90c73c7c-97d6-4b65-9461-f94606ff825f" response="200"
   
     
     
   
    <source
   
     
     
   >
   
     
     
   
      @type tail
   
     
     
   
      @id in_tail_kube_apiserver_audit
   
     
     
   
      multiline_flush_interval 5s
   
     
     
   
      path /var/log/kubernetes/kube
   
     
     
   -apiserver
   
     
     
   -audit.log
   
     
     
   
      pos_file /var/log/kube
   
     
     
   -apiserver
   
     
     
   -audit.log.pos
   
     
     
   
      tag kube
   
     
     
   -apiserver
   
     
     
   -audit
   
     
     
   
      <parse
   
     
     
   >
   
     
     
   
        @type multiline
   
     
     
   
        format_firstline /^\S+\s+AUDIT
   
     
     
   :/
   
     
     
   
        
   
     
     
   # Fields must be explicitly captured by name to be parsed into the record.
   
     
     
   
        
   
     
     
   # Fields may not always be present, and order may change, so this just looks
   
     
     
   
        
   
     
     
   # for a list of key="\"quoted\" value" pairs separated by spaces.
   
     
     
   
        
   
     
     
   # Unknown fields are ignored.
   
     
     
   
        
   
     
     
   # Note: We can't separate query/response lines as format1/format2 because
   
     
     
   
        
   
     
     
   # they don't always come one after the other for a given query.
   
     
     
   
        format1 /^(
   
     
     
   ?<time
   
     
     
   >\S+) AUDIT
   
     
     
   :
   
     
     
   (?
   
     
     
   : (
   
     
     
   ?:id="(
   
     
     
   ?<id
   
     
     
   >(
   
     
     
   ?:[^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|ip="(?<ip>(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|method="(?<method>(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|user="(?<user>(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|groups="(?<groups>(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|as="(?<as>(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|asgroups="(?<asgroups>(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|namespace="(?<namespace>(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|uri="(?<uri>(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|response="(?<response>(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *)"|\w+="(?:
   
     
     
   [^"\\
   
     
     
   ]|\\.)
   
     
     
   *"))*/
   
     
     
   
        time_format %Y
   
     
     
   -%m
   
     
     
   -%dT%T.%L%Z
   
     
     
   
      </parse
   
     
     
   >
   
     
     
   
    </source
   
     
     
   >
   
     
     
   

   
     
     
   
    <filter kubernetes.
   
     
     
   **>
   
     
     
   
      @type kubernetes_metadata
   
     
     
   
      @id filter_kube_metadata
   
     
     
   
      kubernetes_url "
   
     
     
   #{ENV['FLUENT_FILTER_KUBERNETES_URL'] || 'https://' + ENV.fetch('KUBERNETES_SERVICE_HOST') + ':' + ENV.fetch('KUBERNETES_SERVICE_PORT') + '/api'}"
   
     
     
   
      verify_ssl "
   
     
     
   #{ENV['KUBERNETES_VERIFY_SSL'] || true}"
   
     
     
   
      ca_file "
   
     
     
   #{ENV['KUBERNETES_CA_FILE']}"
   
     
     
   
      skip_labels "
   
     
     
   #{ENV['FLUENT_KUBERNETES_METADATA_SKIP_LABELS'] || 'false'}"
   
     
     
   
      skip_container_metadata "
   
     
     
   #{ENV['FLUENT_KUBERNETES_METADATA_SKIP_CONTAINER_METADATA'] || 'false'}"
   
     
     
   
      skip_master_url "
   
     
     
   #{ENV['FLUENT_KUBERNETES_METADATA_SKIP_MASTER_URL'] || 'false'}"
   
     
     
   
      skip_namespace_metadata "
   
     
     
   #{ENV['FLUENT_KUBERNETES_METADATA_SKIP_NAMESPACE_METADATA'] || 'false'}"
   
     
     
   
    </filter
   
     
     
   >---
   
     
     
   apiVersion
   
     
     
   : apps/v1
   
     
     
   

   
     
     
   kind
   
     
     
   : DaemonSet
   
     
     
   

   
     
     
   metadata
   
     
     
   :
   
     
     
   
  
   
     
     
   name
   
     
     
   : fluentd
   
     
     
   
  
   
     
     
   namespace
   
     
     
   : kube
   
     
     
   -system
   
     
     
   
  
   
     
     
   labels
   
     
     
   :
   
     
     
   
    
   
     
     
   k8s-app
   
     
     
   : fluentd
   
     
     
   -logging
   
     
     
   
    
   
     
     
   version
   
     
     
   : v1
   
     
     
   
    
   
     
     
   servicename
   
     
     
   : fluentdcollector
   
     
     
   
    
   
     
     
   clustername
   
     
     
   : pf
   
     
     
   -beta
   
     
     
   -b1k
   
     
     
   

   
     
     
   spec
   
     
     
   :
   
     
     
   
  
   
     
     
   selector
   
     
     
   :
   
     
     
   
    
   
     
     
   matchLabels
   
     
     
   :
   
     
     
   
      
   
     
     
   k8s-app
   
     
     
   : fluentd
   
     
     
   -logging
   
     
     
   
      
   
     
     
   version
   
     
     
   : v1
   
     
     
   
      
   
     
     
   servicename
   
     
     
   : fluentdcollector
   
     
     
   
      
   
     
     
   clustername
   
     
     
   : pf
   
     
     
   -beta
   
     
     
   -b1k
   
     
     
   
  
   
     
     
   template
   
     
     
   :
   
     
     
   
    
   
     
     
   metadata
   
     
     
   :
   
     
     
   
      
   
     
     
   labels
   
     
     
   :
   
     
     
   
        
   
     
     
   k8s-app
   
     
     
   : fluentd
   
     
     
   -logging
   
     
     
   
        
   
     
     
   version
   
     
     
   : v1
   
     
     
   
        
   
     
     
   servicename
   
     
     
   : fluentdcollector
   
     
     
   
        
   
     
     
   clustername
   
     
     
   : pf
   
     
     
   -beta
   
     
     
   -b1k
   
     
     
   
    
   
     
     
   spec
   
     
     
   :
   
     
     
   
      
   
     
     
   tolerations
   
     
     
   :
   
     
     
   
        
   
     
     
   -
   
     
     
   key
   
     
     
   : node
   
     
     
   -role.kubernetes.io/master
   
     
     
   
          
   
     
     
   effect
   
     
     
   : NoSchedule
   
     
     
   
      
   
     
     
   containers
   
     
     
   :
   
     
     
   
        
   
     
     
   -
   
     
     
   name
   
     
     
   : fluentd
   
     
     
   
          
   
     
     
   image
   
     
     
   : fluent/fluentd
   
     
     
   -kubernetes
   
     
     
   -daemonset
   
     
     
   :v1.11.5
   
     
     
   -debian
   
     
     
   -forward
   
     
     
   -
   
     
     
   1.0
   
     
     
   
          
   
     
     
   env
   
     
     
   :
   
     
     
   
            
   
     
     
   -
   
     
     
   name
   
     
     
   : FLUENT_FOWARD_HOST
   
     
     
   
              
   
     
     
   value
   
     
     
   :
   
     
     
   "FLUENT_FOWARD_HOST"
   
     
     
   
            
   
     
     
   -
   
     
     
   name
   
     
     
   : FLUENT_FOWARD_PORT
   
     
     
   
              
   
     
     
   value
   
     
     
   :
   
     
     
   "FLUENT_FOWARD_PORT"
   
     
     
   
          
   
     
     
   resources
   
     
     
   :
   
     
     
   
            
   
     
     
   limits
   
     
     
   :
   
     
     
   
              
   
     
     
   memory
   
     
     
   : 200Mi
   
     
     
   
            
   
     
     
   requests
   
     
     
   :
   
     
     
   
              
   
     
     
   cpu
   
     
     
   : 100m
   
     
     
   
              
   
     
     
   memory
   
     
     
   : 200Mi
   
     
     
   
          
   
     
     
   volumeMounts
   
     
     
   :
   
     
     
   
            
   
     
     
   -
   
     
     
   name
   
     
     
   : varlog
   
     
     
   
              
   
     
     
   mountPath
   
     
     
   : /var/log
   
     
     
   
            
   
     
     
   -
   
     
     
   name
   
     
     
   : varlibdockercontainers
   
     
     
   
              
   
     
     
   mountPath
   
     
     
   : /var/lib/docker/containers
   
     
     
   
              
   
     
     
   readOnly
   
     
     
   :
   
     
     
   true
   
     
     
   
            
   
     
     
   -
   
     
     
   name
   
     
     
   : fluentdetckubernetesconf
   
     
     
   
              
   
     
     
   mountPath
   
     
     
   : /fluentd/etc/kubernetes.conf
   
     
     
   
              
   
     
     
   subPath
   
     
     
   : kubernetes.conf
   
     
     
   
            
   
     
     
   -
   
     
     
   name
   
     
     
   : fluentdetcfluentconf
   
     
     
   
              
   
     
     
   mountPath
   
     
     
   : /fluentd/etc/fluent.conf
   
     
     
   
              
   
     
     
   subPath
   
     
     
   : fluent.conf
   
     
     
   
      
   
     
     
   terminationGracePeriodSeconds
   
     
     
   :
   
     
     
   30
   
     
     
   
      
   
     
     
   volumes
   
     
     
   :
   
     
     
   
        
   
     
     
   -
   
     
     
   name
   
     
     
   : varlog
   
     
     
   
          
   
     
     
   hostPath
   
     
     
   :
   
     
     
   
            
   
     
     
   path
   
     
     
   : /var/log
   
     
     
   
        
   
     
     
   -
   
     
     
   name
   
     
     
   : varlibdockercontainers
   
     
     
   
          
   
     
     
   hostPath
   
     
     
   :
   
     
     
   
            
   
     
     
   path
   
     
     
   : /var/lib/docker/containers
   
     
     
   
        
   
     
     
   -
   
     
     
   name
   
     
     
   : fluentdetckubernetesconf
   
     
     
   
          
   
     
     
   configMap
   
     
     
   :
   
     
     
   
            
   
     
     
   name
   
     
     
   : fluentd
   
     
     
   -daemonset
   
     
     
   -kubernetesconf
   
     
     
   
        
   
     
     
   -
   
     
     
   name
   
     
     
   : fluentdetcfluentconf
   
     
     
   
          
   
     
     
   configMap
   
     
     
   :
   
     
     
   
            
   
     
     
   name
   
     
     
   : fluentd
   
     
     
   -daemonset
   
     
     
   -fluentconf

日志转发端

收集到的日志统一投递到日志转发端，我们的日志转发统一由FluentBit完成，因为FluentBit的性能更高，资源消耗更低。

日志转发端只负责日志转发，多路转发，尽量不包含任何日志处理逻辑。

如果遇到性能问题，日志转发端可以部署多个副本，前面再加一个四层负载均衡即可。

如果后续需要新增日志处理流程，则直接修改转发配置新增转发即可，这样可以做到各个日志处理流程相互隔离影响。

参考配置文件：

  
    
    
  
   
     
     
   [SERVICE]
   
     
     
   
    # Flush
   
     
     
   
    # =====
   
     
     
   
    # Set an interval of seconds before to flush records to a destination
   
     
     
   
    Flush        1
   
     
     
   

   
     
     
   
    # Daemon
   
     
     
   
    # ======
   
     
     
   
    # Instruct Fluent Bit to run in foreground or background mode.
   
     
     
   
    Daemon       Off
   
     
     
   

   
     
     
   
    # Log_Level
   
     
     
   
    # =========
   
     
     
   
    # Set the verbosity level of the service, values can be:
   
     
     
   
    #
   
     
     
   
    # - error
   
     
     
   
    # - warning
   
     
     
   
    # - info
   
     
     
   
    # - debug
   
     
     
   
    # - trace
   
     
     
   
    #
   
     
     
   
    # By default 'info' is set, that means it includes 'error' and 'warning'.
   
     
     
   
    Log_Level    info
   
     
     
   

   
     
     
   
    # Parsers_File
   
     
     
   
    # ============
   
     
     
   
    # Specify an optional 'Parsers' configuration file
   
     
     
   
    Parsers_File parsers.conf
   
     
     
   
    Plugins_File plugins.conf
   
     
     
   

   
     
     
   
    # HTTP Server
   
     
     
   
    # ===========
   
     
     
   
    # Enable/Disable the built-in HTTP Server for metrics
   
     
     
   
    HTTP_Server  Off
   
     
     
   
    HTTP_Listen  0.0.0.0
   
     
     
   
    HTTP_Port    2020
   
     
     
   

   
     
     
   
# [INPUT]
   
     
     
   
#     Name cpu
   
     
     
   
#     Tag  cpu.local
   
     
     
   
#     # Interval Sec
   
     
     
   
#     # ====
   
     
     
   
#     # Read interval (sec) Default: 1
   
     
     
   
#     Interval_Sec 1
   
     
     
   

   
     
     
   
# [OUTPUT]
   
     
     
   
#     Name  stdout
   
     
     
   
#     Match *
   
     
     
   

   
     
     
   
[INPUT]
   
     
     
   
    Name              forward
   
     
     
   
    Listen            0.0.0.0
   
     
     
   
    Port              24224
   
     
     
   
    Buffer_Chunk_Size 2MB
   
     
     
   
    Buffer_Max_Size   64MB
   
     
     
   

   
     
     
   
# 测试消息生成
   
     
     
   
# [INPUT]
   
     
     
   
#     Name dummy
   
     
     
   
#     Tag docker.fluentbitcollector
   
     
     
   
#     Dummy {"log":"[Error ] this is dummy\n","stream":"stderr","attrs":{"tag":"docker.test.fluentbitforwoarder"},"time":"2020-11-07T10:59:53.399975037Z"}
   
     
     
   
#     Rate 1
   
     
     
   

   
     
     
   
# [OUTPUT]
   
     
     
   
#     Name  stdout
   
     
     
   
#     Match *
   
     
     
   

   
     
     
   
# 日志转发落地 fluentd
   
     
     
   
[OUTPUT]
   
     
     
   
    Name  forward
   
     
     
   
    Match *
   
     
     
   
    Host  fluentd.host.addr
   
     
     
   
    Port  24225
   
     
     
   
#     Require_ack_response True
   
     
     
   

   
     
     
   
# 日志转发分析 logexporter
   
     
     
   
[OUTPUT]
   
     
     
   
    Name  http
   
     
     
   
    Match *
   
     
     
   
    Host  logexporter.host.addr
   
     
     
   
    Port  12203
   
     
     
   
    URI   /logs
   
     
     
   
    Format json

日志落地端

我们的日志落地并未采用 elasticsearch 等方案，而是通过一个Fluentd统一收集后写入本地磁盘目录，按天分不同文件，然后通过一个每天运行的轮转脚本将老日志文件同步到AWS S3，并删除本地磁盘的老日志文件。

参考配置：

  
    
    
  
   
     
     
   <source>
   
     
     
   
  @type  forward
   
     
     
   
  @id    input1
   
     
     
   
  @label @mainstream
   
     
     
   
  port  24225
   
     
     
   
</source>
   
     
     
   

   
     
     
   
<filter **>
   
     
     
   
  @type stdout
   
     
     
   
</filter>
   
     
     
   

   
     
     
   
<label @mainstream>
   
     
     
   
  # docker&swarm收集的容器日志
   
     
     
   
  <match docker.**>
   
     
     
   
    @type copy
   
     
     
   
    <store>
   
     
     
   
      @type file
   
     
     
   
      @id   output_docker
   
     
     
   
      path /data/docker/fluentd/log/fluentd.${$.attrs.tag}.*.log
   
     
     
   
      append       true
   
     
     
   
      <format>
   
     
     
   
        @type single_value
   
     
     
   
        add_newline false
   
     
     
   
        message_key log
   
     
     
   
      </format>
   
     
     
   
      <buffer time, $.attrs.tag>
   
     
     
   
        @type memory
   
     
     
   
        flush_at_shutdown true
   
     
     
   
        flush_thread_count 4
   
     
     
   
        flush_thread_interval 0.1
   
     
     
   
        flush_thread_burst_interval 0.1
   
     
     
   

   
     
     
   
        # flush_mode immediate
   
     
     
   

   
     
     
   
        flush_mode interval
   
     
     
   
        flush_interval 1s
   
     
     
   
      </buffer>
   
     
     
   
    </store>
   
     
     
   
    # <store>
   
     
     
   
    #   @type stdout
   
     
     
   
    # </store>
   
     
     
   
  </match>
   
     
     
   

   
     
     
   
  # k8s容器日志
   
     
     
   
  <match kubernetes.**>
   
     
     
   
    @type copy
   
     
     
   
    <store>
   
     
     
   
      @type file
   
     
     
   
      @id   output_k8s1
   
     
     
   
      path /data/docker/fluentd/log/fluentd.k8s.${$.kubernetes.labels.clustername}.${$.kubernetes.labels.servicename}.*.log
   
     
     
   
      append       true
   
     
     
   
      <format>
   
     
     
   
        @type single_value
   
     
     
   
        add_newline false
   
     
     
   
        message_key log
   
     
     
   
      </format>
   
     
     
   
      <buffer time, $.kubernetes.labels.servicename, $.kubernetes.labels.clustername>
   
     
     
   
        @type memory
   
     
     
   
        flush_at_shutdown true
   
     
     
   
        flush_thread_count 4
   
     
     
   
        flush_thread_interval 0.1
   
     
     
   
        flush_thread_burst_interval 0.1
   
     
     
   

   
     
     
   
        # flush_mode immediate
   
     
     
   

   
     
     
   
        flush_mode interval
   
     
     
   
        flush_interval 1s
   
     
     
   
      </buffer>
   
     
     
   
    </store>
   
     
     
   
    # <store>
   
     
     
   
    #   @type stdout
   
     
     
   
    # </store>
   
     
     
   
  </match>
   
     
     
   

   
     
     
   
  # kubelet日志
   
     
     
   
  <match kubelet>
   
     
     
   
    @type copy
   
     
     
   
    <store>
   
     
     
   
      @type file
   
     
     
   
      @id   output_kubelet
   
     
     
   
      path /data/docker/fluentd/log/fluentd.${tag}.${$._HOSTNAME}.*.log
   
     
     
   
      append       true
   
     
     
   
      <format>
   
     
     
   
        @type single_value
   
     
     
   
        add_newline true
   
     
     
   
        message_key MESSAGE
   
     
     
   
      </format>
   
     
     
   
      <buffer time, tag, $._HOSTNAME>
   
     
     
   
        @type memory
   
     
     
   
        flush_at_shutdown true
   
     
     
   
        flush_thread_count 4
   
     
     
   
        flush_thread_interval 0.1
   
     
     
   
        flush_thread_burst_interval 0.1
   
     
     
   

   
     
     
   
        # flush_mode immediate
   
     
     
   

   
     
     
   
        flush_mode interval
   
     
     
   
        flush_interval 1s
   
     
     
   
      </buffer>
   
     
     
   
    </store>
   
     
     
   
    # <store>
   
     
     
   
    #   @type stdout
   
     
     
   
    # </store>
   
     
     
   
  </match>
   
     
     
   

   
     
     
   
  # 默认落地规则
   
     
     
   
  <match **>
   
     
     
   
    @type file
   
     
     
   
    @id   output1
   
     
     
   
    path         /data/docker/fluentd/log/data.*.log
   
     
     
   
    append       true
   
     
     
   
    <buffer time>
   
     
     
   
    @type memory
   
     
     
   
       flush_at_shutdown true
   
     
     
   
       flush_thread_count 4
   
     
     
   
       flush_thread_interval 0.1
   
     
     
   
       flush_thread_burst_interval 0.1
   
     
     
   

   
     
     
   
       # flush_mode immediate
   
     
     
   

   
     
     
   
       flush_mode interval
   
     
     
   
       flush_interval 1s
   
     
     
   
    </buffer>
   
     
     
   
  </match>
   
     
     
   
</label>

日志分析

我们的日志分析组件由golang开发，接受http请求并分析日志中出现的错误日志等级的标记，将错误日志写入数据库并将统计指标打入prometheus进行告警。

收集到的日志不是只有我们自己的业务日志，还可能有其他基础组件的日志，这里我们自己开发日志分析组件灵活性更高，更能适配我们自身的需求。

写入数据库的错误日志我们通过自己开发的前端工具进行快速查询，甚至可以接入钉钉机器人实现告警后自动将错误日志发送至告警群，方便值班的同学进行处理。

vlambda博客
学习文章列表