Class: OsCtld::Container::Recovery

Inherits:
Object
  • Object
show all
Includes:
OsCtl::Lib::Utils::Log, OsCtl::Lib::Utils::System
Defined in:
lib/osctld/container/recovery.rb

Overview

Contains method to work with an unresponsive or dead containers

Defined Under Namespace

Classes: RouteList

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ct) ⇒ Recovery

Returns a new instance of Recovery.

Parameters:



10
11
12
# File 'lib/osctld/container/recovery.rb', line 10

def initialize(ct)
  @ct = ct
end

Instance Attribute Details

#ctObject (readonly, protected)

Returns the value of attribute ct.



188
189
190
# File 'lib/osctld/container/recovery.rb', line 188

def ct
  @ct
end

Instance Method Details

#cleanup_cgroupsObject

Remove left-over cgroups in container path



100
101
102
103
104
# File 'lib/osctld/container/recovery.rb', line 100

def cleanup_cgroups
  CGroup.rmpath_all(File.join(ct.cgroup_path, "lxc.payload.#{ct.id}"))
  CGroup.rmpath_all(File.join(ct.cgroup_path, "lxc.monitor.#{ct.id}"))
  CGroup.rmpath_all(File.join(ct.cgroup_path, "lxc.pivot.#{ct.id}"))
end

#cleanup_netifs {|veth, routes| ... } ⇒ Object

Find and remove left-over network interfaces used by the container

Yield Parameters:



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/osctld/container/recovery.rb', line 109

def cleanup_netifs
  # name => routes
  veths = {}

  [4, 6].each do |ip_v|
    routes = RouteList.new(ip_v)

    ct.netifs.each do |netif|
      next if netif.type != :routed

      netif.routes.each_version(ip_v) do |route|
        veth = routes.veth_of(route)

        if veth
          log(:info, "Found route #{route.addr.to_string} on #{veth}")
          veths[veth] = [] unless veths.has_key?(veth)
          veths[veth] << route
        end
      end
    end
  end

  veths.each do |veth, routes|
    found = DB::Containers.get.detect do |ct|
      n = ct.netifs.detect do |netif|
        netif.respond_to?(:veth) && netif.veth == veth
      end
      n && ct
    end

    if found
      log(:info, "Interface #{veth} is used by container #{found.ident}")
    else
      yield(veth, routes) if block_given?
      log(:info, "Removing #{veth}")
      syscmd("ip link delete #{veth}")
    end
  end
end

#cleanup_or_taintBoolean

Cleanup after the container or put the container into an error state

Returns:

  • (Boolean)


78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/osctld/container/recovery.rb', line 78

def cleanup_or_taint
  taint = false

  begin
    cleanup_cgroups
  rescue => e
    log(:warn, "Failed to cleanup cgroups: #{e.class}: #{e.message}")
    taint = true
  end

  begin
    cleanup_netifs
  rescue => e
    log(:warn, "Failed to cleanup netifs: #{e.class}: #{e.message}")
    taint = true
  end

  ct.state = :error if taint
  !taint
end

#kill_all(signal: 'KILL') ⇒ Object

Kill all processes found in the container's cgroup with signal

Parameters:

  • signal (String) (defaults to: 'KILL')


51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/osctld/container/recovery.rb', line 51

def kill_all(signal: 'KILL')
  pl = OsCtl::Lib::ProcessList.new do |p|
    ctid = p.ct_id
    next(false) if ctid.nil?

    ctid[0] == ct.pool.name && ctid[1] == ct.id
  end

  log(:info, "#{pl.length} processes to kill")
  pl.each do |p|
    # Double check
    ctid = p.ct_id
    next if ctid.nil?

    if ctid[0] == ct.pool.name && ctid[1] == ct.id
      log(:info, "kill -SIG#{signal} #{p.pid} #{p.name}")

      begin
        Process.kill(signal, p.pid)
      rescue Errno::ESRCH
      end
    end
  end
end

#log_typeObject



149
150
151
# File 'lib/osctld/container/recovery.rb', line 149

def log_type
  "recover=#{ct.pool.name}:#{ct.id}"
end

#recover_stateObject

Rediscover container state

If the container is found dead, appropriate actions and hooks for container stop are run.



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/osctld/container/recovery.rb', line 18

def recover_state
  orig_state = ct.state
  current_state = ct.current_state

  if orig_state == current_state
    return

  elsif current_state == :stopped
    # Put all network interfaces down
    ct.netifs.take_down

    # Unload AppArmor profile and destroy namespace
    ct.apparmor.destroy_namespace
    ct.apparmor.unload_profile

    ct.stopped

    # User-defined hook
    Container::Hook.run(ct, :post_stop)

    # Announce the change first as :aborting, that will cause a waiting
    # osctl ct start to give it up
    Eventd.report(:state, pool: ct.pool.name, id: ct.id, state: :aborting)
    Eventd.report(:state, pool: ct.pool.name, id: ct.id, state: :stopped)

  else
    # Announce the change
    Eventd.report(:state, pool: ct.pool.name, id: ct.id, state: change[:state])
  end
end