Class: OsCtld::Container::Recovery

Inherits:
Object
  • Object
show all
Includes:
OsCtl::Lib::Utils::Log, OsCtl::Lib::Utils::System
Defined in:
lib/osctld/container/recovery.rb

Overview

Contains method to work with an unresponsive or dead containers

Defined Under Namespace

Classes: RouteList

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ct) ⇒ Recovery

Returns a new instance of Recovery.

Parameters:



10
11
12
# File 'lib/osctld/container/recovery.rb', line 10

def initialize(ct)
  @ct = ct
end

Instance Attribute Details

#ctObject (readonly, protected)

Returns the value of attribute ct.



193
194
195
# File 'lib/osctld/container/recovery.rb', line 193

def ct
  @ct
end

Instance Method Details

#cleanup_cgroupsObject

Remove left-over cgroups in container path



102
103
104
105
106
# File 'lib/osctld/container/recovery.rb', line 102

def cleanup_cgroups
  CGroup.rmpath_all(File.join(ct.cgroup_path, "lxc.payload.#{ct.id}"))
  CGroup.rmpath_all(File.join(ct.cgroup_path, "lxc.monitor.#{ct.id}"))
  CGroup.rmpath_all(File.join(ct.cgroup_path, "lxc.pivot.#{ct.id}"))
end

#cleanup_netifs {|veth, routes| ... } ⇒ Object

Find and remove left-over network interfaces used by the container

Yield Parameters:



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/osctld/container/recovery.rb', line 111

def cleanup_netifs
  # name => routes
  veths = {}

  [4, 6].each do |ip_v|
    routes = RouteList.new(ip_v)

    ct.netifs.each do |netif|
      next if netif.type != :routed

      netif.routes.each_version(ip_v) do |route|
        veth = routes.veth_of(route)

        if veth
          log(:info, "Found route #{route.addr.to_string} on #{veth}")
          veths[veth] = [] unless veths.has_key?(veth)
          veths[veth] << route
        end
      end
    end
  end

  veths.each do |veth, routes|
    found = DB::Containers.get.detect do |ct|
      n = ct.netifs.detect do |netif|
        netif.respond_to?(:veth) && netif.veth == veth
      end
      n && ct
    end

    if found
      log(:info, "Interface #{veth} is used by container #{found.ident}")
    else
      yield(veth, routes) if block_given?
      log(:info, "Removing #{veth}")
      syscmd("ip link delete #{veth}")

      # Remove also its IFB counterpart for shaping if it exists
      syscmd("ip link delete ifb#{veth}", valid_rcs: [1])
    end
  end
end

#cleanup_or_taintBoolean

Cleanup after the container or put the container into an error state

Returns:

  • (Boolean)


80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/osctld/container/recovery.rb', line 80

def cleanup_or_taint
  taint = false

  begin
    cleanup_cgroups
  rescue => e
    log(:warn, "Failed to cleanup cgroups: #{e.class}: #{e.message}")
    taint = true
  end

  begin
    cleanup_netifs
  rescue => e
    log(:warn, "Failed to cleanup netifs: #{e.class}: #{e.message}")
    taint = true
  end

  ct.state = :error if taint
  !taint
end

#kill_all(signal: 'KILL') ⇒ Object

Kill all processes found in the container's cgroup with signal

Parameters:

  • signal (String) (defaults to: 'KILL')


53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/osctld/container/recovery.rb', line 53

def kill_all(signal: 'KILL')
  pl = OsCtl::Lib::ProcessList.new do |p|
    ctid = p.ct_id
    next(false) if ctid.nil?

    ctid[0] == ct.pool.name && ctid[1] == ct.id
  end

  log(:info, "#{pl.length} processes to kill")
  pl.each do |p|
    # Double check
    ctid = p.ct_id
    next if ctid.nil?

    if ctid[0] == ct.pool.name && ctid[1] == ct.id
      log(:info, "kill -SIG#{signal} #{p.pid} #{p.name}")

      begin
        Process.kill(signal, p.pid)
      rescue Errno::ESRCH
      end
    end
  end
end

#log_typeObject



154
155
156
# File 'lib/osctld/container/recovery.rb', line 154

def log_type
  "recover=#{ct.pool.name}:#{ct.id}"
end

#recover_stateObject

Rediscover container state

If the container is found dead, appropriate actions and hooks for container stop are run.



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/osctld/container/recovery.rb', line 18

def recover_state
  orig_state = ct.state
  current_state = ct.current_state

  if orig_state == current_state
    return

  elsif current_state == :stopped
    # Put all network interfaces down
    ct.netifs.take_down

    # Unload AppArmor profile and destroy namespace
    if AppArmor.enabled?
      ct.apparmor.destroy_namespace
      ct.apparmor.unload_profile
    end

    ct.stopped

    # User-defined hook
    Hook.run(ct, :post_stop)

    # Announce the change first as :aborting, that will cause a waiting
    # osctl ct start to give it up
    Eventd.report(:state, pool: ct.pool.name, id: ct.id, state: :aborting)
    Eventd.report(:state, pool: ct.pool.name, id: ct.id, state: :stopped)

  else
    # Announce the change
    Eventd.report(:state, pool: ct.pool.name, id: ct.id, state: change[:state])
  end
end