Class: OsCtld::Container::Recovery

Inherits:
Object
  • Object
show all
Includes:
OsCtl::Lib::Utils::Log, OsCtl::Lib::Utils::System
Defined in:
lib/osctld/container/recovery.rb

Overview

Contains method to work with an unresponsive or dead containers

Defined Under Namespace

Classes: RouteList

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ct) ⇒ Recovery

Returns a new instance of Recovery.

Parameters:



10
11
12
# File 'lib/osctld/container/recovery.rb', line 10

def initialize(ct)
  @ct = ct
end

Instance Attribute Details

#ctObject (readonly, protected)

Returns the value of attribute ct.



195
196
197
# File 'lib/osctld/container/recovery.rb', line 195

def ct
  @ct
end

Instance Method Details

#cleanup_cgroupsObject

Remove left-over cgroups in container path



103
104
105
106
107
# File 'lib/osctld/container/recovery.rb', line 103

def cleanup_cgroups
  CGroup.rmpath_all(File.join(ct.cgroup_path, "lxc.payload.#{ct.id}"))
  CGroup.rmpath_all(File.join(ct.cgroup_path, "lxc.monitor.#{ct.id}"))
  CGroup.rmpath_all(File.join(ct.cgroup_path, "lxc.pivot.#{ct.id}"))
end

#cleanup_netifs {|veth, routes| ... } ⇒ Object

Find and remove left-over network interfaces used by the container

Yield Parameters:



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/osctld/container/recovery.rb', line 112

def cleanup_netifs
  # name => routes
  veths = {}

  [4, 6].each do |ip_v|
    routes = RouteList.new(ip_v)

    ct.netifs.each do |netif|
      next if netif.type != :routed

      netif.routes.each_version(ip_v) do |route|
        veth = routes.veth_of(route)

        next unless veth

        log(:info, "Found route #{route.addr.to_string} on #{veth}")
        veths[veth] = [] unless veths.has_key?(veth)
        veths[veth] << route
      end
    end
  end

  veths.each do |veth, routes|
    found = DB::Containers.get.detect do |ct|
      n = ct.netifs.detect do |netif|
        netif.respond_to?(:veth) && netif.veth == veth
      end
      n && ct
    end

    if found
      log(:info, "Interface #{veth} is used by container #{found.ident}")
    else
      yield(veth, routes) if block_given?
      log(:info, "Removing #{veth}")
      syscmd("ip link delete #{veth}")

      # Remove also its IFB counterpart for shaping if it exists
      syscmd("ip link delete ifb#{veth}", valid_rcs: [1])
    end
  end
end

#cleanup_or_taintBoolean

Cleanup after the container or put the container into an error state

Returns:

  • (Boolean)


81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/osctld/container/recovery.rb', line 81

def cleanup_or_taint
  taint = false

  begin
    cleanup_cgroups
  rescue StandardError => e
    log(:warn, "Failed to cleanup cgroups: #{e.class}: #{e.message}")
    taint = true
  end

  begin
    cleanup_netifs
  rescue StandardError => e
    log(:warn, "Failed to cleanup netifs: #{e.class}: #{e.message}")
    taint = true
  end

  ct.state = :error if taint
  !taint
end

#kill_all(signal: 'KILL') ⇒ Object

Kill all processes found in the container’s cgroup with signal

Parameters:

  • signal (String) (defaults to: 'KILL')


53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/osctld/container/recovery.rb', line 53

def kill_all(signal: 'KILL')
  pl = OsCtl::Lib::ProcessList.new do |p|
    ctid = p.ct_id
    next(false) if ctid.nil?

    ctid[0] == ct.pool.name && ctid[1] == ct.id
  end

  log(:info, "#{pl.length} processes to kill")
  pl.each do |p|
    # Double check
    ctid = p.ct_id
    next if ctid.nil?

    next unless ctid[0] == ct.pool.name && ctid[1] == ct.id

    log(:info, "kill -SIG#{signal} #{p.pid} #{p.name}")

    begin
      Process.kill(signal, p.pid)
    rescue Errno::ESRCH
      # ignore
    end
  end
end

#log_typeObject



155
156
157
# File 'lib/osctld/container/recovery.rb', line 155

def log_type
  "recover=#{ct.pool.name}:#{ct.id}"
end

#recover_stateObject

Rediscover container state

If the container is found dead, appropriate actions and hooks for container stop are run.



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/osctld/container/recovery.rb', line 18

def recover_state
  orig_state = ct.state
  current_state = ct.current_state

  if orig_state == current_state
    nil

  elsif current_state == :stopped
    # Put all network interfaces down
    ct.netifs.take_down

    # Unload AppArmor profile and destroy namespace
    if AppArmor.enabled?
      ct.apparmor.destroy_namespace
      ct.apparmor.unload_profile
    end

    ct.stopped

    # User-defined hook
    Hook.run(ct, :post_stop)

    # Announce the change first as :aborting, that will cause a waiting
    # osctl ct start to give it up
    Eventd.report(:state, pool: ct.pool.name, id: ct.id, state: :aborting)
    Eventd.report(:state, pool: ct.pool.name, id: ct.id, state: :stopped)

  else
    # Announce the change
    Eventd.report(:state, pool: ct.pool.name, id: ct.id, state: change[:state])
  end
end