mirror of
https://github.com/ceph/ceph-csi.git
synced 2025-06-13 10:33:35 +00:00
build: move e2e dependencies into e2e/go.mod
Several packages are only used while running the e2e suite. These packages are less important to update, as the they can not influence the final executable that is part of the Ceph-CSI container-image. By moving these dependencies out of the main Ceph-CSI go.mod, it is easier to identify if a reported CVE affects Ceph-CSI, or only the testing (like most of the Kubernetes CVEs). Signed-off-by: Niels de Vos <ndevos@ibm.com>
This commit is contained in:
committed by
mergify[bot]
parent
15da101b1b
commit
bec6090996
4
e2e/vendor/github.com/opencontainers/go-digest/.mailmap
generated
vendored
Normal file
4
e2e/vendor/github.com/opencontainers/go-digest/.mailmap
generated
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
Aaron Lehmann <aaronl@vitelus.com> <aaron.lehmann@docker.com>
|
||||
Derek McGowan <derek@mcg.dev> <derek@mcgstyle.net>
|
||||
Stephen J Day <stephen.day@docker.com> <stevvooe@users.noreply.github.com>
|
||||
Haibing Zhou <zhouhaibing089@gmail.com>
|
28
e2e/vendor/github.com/opencontainers/go-digest/.pullapprove.yml
generated
vendored
Normal file
28
e2e/vendor/github.com/opencontainers/go-digest/.pullapprove.yml
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
version: 2
|
||||
|
||||
requirements:
|
||||
signed_off_by:
|
||||
required: true
|
||||
|
||||
always_pending:
|
||||
title_regex: '^WIP'
|
||||
explanation: 'Work in progress...'
|
||||
|
||||
group_defaults:
|
||||
required: 2
|
||||
approve_by_comment:
|
||||
enabled: true
|
||||
approve_regex: '^LGTM'
|
||||
reject_regex: '^Rejected'
|
||||
reset_on_push:
|
||||
enabled: true
|
||||
author_approval:
|
||||
ignored: true
|
||||
conditions:
|
||||
branches:
|
||||
- master
|
||||
|
||||
groups:
|
||||
go-digest:
|
||||
teams:
|
||||
- go-digest-maintainers
|
5
e2e/vendor/github.com/opencontainers/go-digest/.travis.yml
generated
vendored
Normal file
5
e2e/vendor/github.com/opencontainers/go-digest/.travis.yml
generated
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
language: go
|
||||
go:
|
||||
- 1.12.x
|
||||
- 1.13.x
|
||||
- master
|
72
e2e/vendor/github.com/opencontainers/go-digest/CONTRIBUTING.md
generated
vendored
Normal file
72
e2e/vendor/github.com/opencontainers/go-digest/CONTRIBUTING.md
generated
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
# Contributing to Docker open source projects
|
||||
|
||||
Want to hack on this project? Awesome! Here are instructions to get you started.
|
||||
|
||||
This project is a part of the [Docker](https://www.docker.com) project, and follows
|
||||
the same rules and principles. If you're already familiar with the way
|
||||
Docker does things, you'll feel right at home.
|
||||
|
||||
Otherwise, go read Docker's
|
||||
[contributions guidelines](https://github.com/docker/docker/blob/master/CONTRIBUTING.md),
|
||||
[issue triaging](https://github.com/docker/docker/blob/master/project/ISSUE-TRIAGE.md),
|
||||
[review process](https://github.com/docker/docker/blob/master/project/REVIEWING.md) and
|
||||
[branches and tags](https://github.com/docker/docker/blob/master/project/BRANCHES-AND-TAGS.md).
|
||||
|
||||
For an in-depth description of our contribution process, visit the
|
||||
contributors guide: [Understand how to contribute](https://docs.docker.com/opensource/workflow/make-a-contribution/)
|
||||
|
||||
### Sign your work
|
||||
|
||||
The sign-off is a simple line at the end of the explanation for the patch. Your
|
||||
signature certifies that you wrote the patch or otherwise have the right to pass
|
||||
it on as an open-source patch. The rules are pretty simple: if you can certify
|
||||
the below (from [developercertificate.org](http://developercertificate.org/)):
|
||||
|
||||
```
|
||||
Developer Certificate of Origin
|
||||
Version 1.1
|
||||
|
||||
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
||||
1 Letterman Drive
|
||||
Suite D4700
|
||||
San Francisco, CA, 94129
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
|
||||
Developer's Certificate of Origin 1.1
|
||||
|
||||
By making a contribution to this project, I certify that:
|
||||
|
||||
(a) The contribution was created in whole or in part by me and I
|
||||
have the right to submit it under the open source license
|
||||
indicated in the file; or
|
||||
|
||||
(b) The contribution is based upon previous work that, to the best
|
||||
of my knowledge, is covered under an appropriate open source
|
||||
license and I have the right under that license to submit that
|
||||
work with modifications, whether created in whole or in part
|
||||
by me, under the same open source license (unless I am
|
||||
permitted to submit under a different license), as indicated
|
||||
in the file; or
|
||||
|
||||
(c) The contribution was provided directly to me by some other
|
||||
person who certified (a), (b) or (c) and I have not modified
|
||||
it.
|
||||
|
||||
(d) I understand and agree that this project and the contribution
|
||||
are public and that a record of the contribution (including all
|
||||
personal information I submit with it, including my sign-off) is
|
||||
maintained indefinitely and may be redistributed consistent with
|
||||
this project or the open source license(s) involved.
|
||||
```
|
||||
|
||||
Then you just add a line to every git commit message:
|
||||
|
||||
Signed-off-by: Joe Smith <joe.smith@email.com>
|
||||
|
||||
Use your real name (sorry, no pseudonyms or anonymous contributions.)
|
||||
|
||||
If you set your `user.name` and `user.email` git configs, you can sign your
|
||||
commit automatically with `git commit -s`.
|
192
e2e/vendor/github.com/opencontainers/go-digest/LICENSE
generated
vendored
Normal file
192
e2e/vendor/github.com/opencontainers/go-digest/LICENSE
generated
vendored
Normal file
@ -0,0 +1,192 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
https://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2019, 2020 OCI Contributors
|
||||
Copyright 2016 Docker, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
https://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
425
e2e/vendor/github.com/opencontainers/go-digest/LICENSE.docs
generated
vendored
Normal file
425
e2e/vendor/github.com/opencontainers/go-digest/LICENSE.docs
generated
vendored
Normal file
@ -0,0 +1,425 @@
|
||||
Attribution-ShareAlike 4.0 International
|
||||
|
||||
=======================================================================
|
||||
|
||||
Creative Commons Corporation ("Creative Commons") is not a law firm and
|
||||
does not provide legal services or legal advice. Distribution of
|
||||
Creative Commons public licenses does not create a lawyer-client or
|
||||
other relationship. Creative Commons makes its licenses and related
|
||||
information available on an "as-is" basis. Creative Commons gives no
|
||||
warranties regarding its licenses, any material licensed under their
|
||||
terms and conditions, or any related information. Creative Commons
|
||||
disclaims all liability for damages resulting from their use to the
|
||||
fullest extent possible.
|
||||
|
||||
Using Creative Commons Public Licenses
|
||||
|
||||
Creative Commons public licenses provide a standard set of terms and
|
||||
conditions that creators and other rights holders may use to share
|
||||
original works of authorship and other material subject to copyright
|
||||
and certain other rights specified in the public license below. The
|
||||
following considerations are for informational purposes only, are not
|
||||
exhaustive, and do not form part of our licenses.
|
||||
|
||||
Considerations for licensors: Our public licenses are
|
||||
intended for use by those authorized to give the public
|
||||
permission to use material in ways otherwise restricted by
|
||||
copyright and certain other rights. Our licenses are
|
||||
irrevocable. Licensors should read and understand the terms
|
||||
and conditions of the license they choose before applying it.
|
||||
Licensors should also secure all rights necessary before
|
||||
applying our licenses so that the public can reuse the
|
||||
material as expected. Licensors should clearly mark any
|
||||
material not subject to the license. This includes other CC-
|
||||
licensed material, or material used under an exception or
|
||||
limitation to copyright. More considerations for licensors:
|
||||
wiki.creativecommons.org/Considerations_for_licensors
|
||||
|
||||
Considerations for the public: By using one of our public
|
||||
licenses, a licensor grants the public permission to use the
|
||||
licensed material under specified terms and conditions. If
|
||||
the licensor's permission is not necessary for any reason--for
|
||||
example, because of any applicable exception or limitation to
|
||||
copyright--then that use is not regulated by the license. Our
|
||||
licenses grant only permissions under copyright and certain
|
||||
other rights that a licensor has authority to grant. Use of
|
||||
the licensed material may still be restricted for other
|
||||
reasons, including because others have copyright or other
|
||||
rights in the material. A licensor may make special requests,
|
||||
such as asking that all changes be marked or described.
|
||||
Although not required by our licenses, you are encouraged to
|
||||
respect those requests where reasonable. More_considerations
|
||||
for the public:
|
||||
wiki.creativecommons.org/Considerations_for_licensees
|
||||
|
||||
=======================================================================
|
||||
|
||||
Creative Commons Attribution-ShareAlike 4.0 International Public
|
||||
License
|
||||
|
||||
By exercising the Licensed Rights (defined below), You accept and agree
|
||||
to be bound by the terms and conditions of this Creative Commons
|
||||
Attribution-ShareAlike 4.0 International Public License ("Public
|
||||
License"). To the extent this Public License may be interpreted as a
|
||||
contract, You are granted the Licensed Rights in consideration of Your
|
||||
acceptance of these terms and conditions, and the Licensor grants You
|
||||
such rights in consideration of benefits the Licensor receives from
|
||||
making the Licensed Material available under these terms and
|
||||
conditions.
|
||||
|
||||
|
||||
Section 1 -- Definitions.
|
||||
|
||||
a. Adapted Material means material subject to Copyright and Similar
|
||||
Rights that is derived from or based upon the Licensed Material
|
||||
and in which the Licensed Material is translated, altered,
|
||||
arranged, transformed, or otherwise modified in a manner requiring
|
||||
permission under the Copyright and Similar Rights held by the
|
||||
Licensor. For purposes of this Public License, where the Licensed
|
||||
Material is a musical work, performance, or sound recording,
|
||||
Adapted Material is always produced where the Licensed Material is
|
||||
synched in timed relation with a moving image.
|
||||
|
||||
b. Adapter's License means the license You apply to Your Copyright
|
||||
and Similar Rights in Your contributions to Adapted Material in
|
||||
accordance with the terms and conditions of this Public License.
|
||||
|
||||
c. BY-SA Compatible License means a license listed at
|
||||
creativecommons.org/compatiblelicenses, approved by Creative
|
||||
Commons as essentially the equivalent of this Public License.
|
||||
|
||||
d. Copyright and Similar Rights means copyright and/or similar rights
|
||||
closely related to copyright including, without limitation,
|
||||
performance, broadcast, sound recording, and Sui Generis Database
|
||||
Rights, without regard to how the rights are labeled or
|
||||
categorized. For purposes of this Public License, the rights
|
||||
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
||||
Rights.
|
||||
|
||||
e. Effective Technological Measures means those measures that, in the
|
||||
absence of proper authority, may not be circumvented under laws
|
||||
fulfilling obligations under Article 11 of the WIPO Copyright
|
||||
Treaty adopted on December 20, 1996, and/or similar international
|
||||
agreements.
|
||||
|
||||
f. Exceptions and Limitations means fair use, fair dealing, and/or
|
||||
any other exception or limitation to Copyright and Similar Rights
|
||||
that applies to Your use of the Licensed Material.
|
||||
|
||||
g. License Elements means the license attributes listed in the name
|
||||
of a Creative Commons Public License. The License Elements of this
|
||||
Public License are Attribution and ShareAlike.
|
||||
|
||||
h. Licensed Material means the artistic or literary work, database,
|
||||
or other material to which the Licensor applied this Public
|
||||
License.
|
||||
|
||||
i. Licensed Rights means the rights granted to You subject to the
|
||||
terms and conditions of this Public License, which are limited to
|
||||
all Copyright and Similar Rights that apply to Your use of the
|
||||
Licensed Material and that the Licensor has authority to license.
|
||||
|
||||
j. Licensor means the individual(s) or entity(ies) granting rights
|
||||
under this Public License.
|
||||
|
||||
k. Share means to provide material to the public by any means or
|
||||
process that requires permission under the Licensed Rights, such
|
||||
as reproduction, public display, public performance, distribution,
|
||||
dissemination, communication, or importation, and to make material
|
||||
available to the public including in ways that members of the
|
||||
public may access the material from a place and at a time
|
||||
individually chosen by them.
|
||||
|
||||
l. Sui Generis Database Rights means rights other than copyright
|
||||
resulting from Directive 96/9/EC of the European Parliament and of
|
||||
the Council of 11 March 1996 on the legal protection of databases,
|
||||
as amended and/or succeeded, as well as other essentially
|
||||
equivalent rights anywhere in the world.
|
||||
|
||||
m. You means the individual or entity exercising the Licensed Rights
|
||||
under this Public License. Your has a corresponding meaning.
|
||||
|
||||
|
||||
Section 2 -- Scope.
|
||||
|
||||
a. License grant.
|
||||
|
||||
1. Subject to the terms and conditions of this Public License,
|
||||
the Licensor hereby grants You a worldwide, royalty-free,
|
||||
non-sublicensable, non-exclusive, irrevocable license to
|
||||
exercise the Licensed Rights in the Licensed Material to:
|
||||
|
||||
a. reproduce and Share the Licensed Material, in whole or
|
||||
in part; and
|
||||
|
||||
b. produce, reproduce, and Share Adapted Material.
|
||||
|
||||
2. Exceptions and Limitations. For the avoidance of doubt, where
|
||||
Exceptions and Limitations apply to Your use, this Public
|
||||
License does not apply, and You do not need to comply with
|
||||
its terms and conditions.
|
||||
|
||||
3. Term. The term of this Public License is specified in Section
|
||||
6(a).
|
||||
|
||||
4. Media and formats; technical modifications allowed. The
|
||||
Licensor authorizes You to exercise the Licensed Rights in
|
||||
all media and formats whether now known or hereafter created,
|
||||
and to make technical modifications necessary to do so. The
|
||||
Licensor waives and/or agrees not to assert any right or
|
||||
authority to forbid You from making technical modifications
|
||||
necessary to exercise the Licensed Rights, including
|
||||
technical modifications necessary to circumvent Effective
|
||||
Technological Measures. For purposes of this Public License,
|
||||
simply making modifications authorized by this Section 2(a)
|
||||
(4) never produces Adapted Material.
|
||||
|
||||
5. Downstream recipients.
|
||||
|
||||
a. Offer from the Licensor -- Licensed Material. Every
|
||||
recipient of the Licensed Material automatically
|
||||
receives an offer from the Licensor to exercise the
|
||||
Licensed Rights under the terms and conditions of this
|
||||
Public License.
|
||||
|
||||
b. Additional offer from the Licensor -- Adapted Material.
|
||||
Every recipient of Adapted Material from You
|
||||
automatically receives an offer from the Licensor to
|
||||
exercise the Licensed Rights in the Adapted Material
|
||||
under the conditions of the Adapter's License You apply.
|
||||
|
||||
c. No downstream restrictions. You may not offer or impose
|
||||
any additional or different terms or conditions on, or
|
||||
apply any Effective Technological Measures to, the
|
||||
Licensed Material if doing so restricts exercise of the
|
||||
Licensed Rights by any recipient of the Licensed
|
||||
Material.
|
||||
|
||||
6. No endorsement. Nothing in this Public License constitutes or
|
||||
may be construed as permission to assert or imply that You
|
||||
are, or that Your use of the Licensed Material is, connected
|
||||
with, or sponsored, endorsed, or granted official status by,
|
||||
the Licensor or others designated to receive attribution as
|
||||
provided in Section 3(a)(1)(A)(i).
|
||||
|
||||
b. Other rights.
|
||||
|
||||
1. Moral rights, such as the right of integrity, are not
|
||||
licensed under this Public License, nor are publicity,
|
||||
privacy, and/or other similar personality rights; however, to
|
||||
the extent possible, the Licensor waives and/or agrees not to
|
||||
assert any such rights held by the Licensor to the limited
|
||||
extent necessary to allow You to exercise the Licensed
|
||||
Rights, but not otherwise.
|
||||
|
||||
2. Patent and trademark rights are not licensed under this
|
||||
Public License.
|
||||
|
||||
3. To the extent possible, the Licensor waives any right to
|
||||
collect royalties from You for the exercise of the Licensed
|
||||
Rights, whether directly or through a collecting society
|
||||
under any voluntary or waivable statutory or compulsory
|
||||
licensing scheme. In all other cases the Licensor expressly
|
||||
reserves any right to collect such royalties.
|
||||
|
||||
|
||||
Section 3 -- License Conditions.
|
||||
|
||||
Your exercise of the Licensed Rights is expressly made subject to the
|
||||
following conditions.
|
||||
|
||||
a. Attribution.
|
||||
|
||||
1. If You Share the Licensed Material (including in modified
|
||||
form), You must:
|
||||
|
||||
a. retain the following if it is supplied by the Licensor
|
||||
with the Licensed Material:
|
||||
|
||||
i. identification of the creator(s) of the Licensed
|
||||
Material and any others designated to receive
|
||||
attribution, in any reasonable manner requested by
|
||||
the Licensor (including by pseudonym if
|
||||
designated);
|
||||
|
||||
ii. a copyright notice;
|
||||
|
||||
iii. a notice that refers to this Public License;
|
||||
|
||||
iv. a notice that refers to the disclaimer of
|
||||
warranties;
|
||||
|
||||
v. a URI or hyperlink to the Licensed Material to the
|
||||
extent reasonably practicable;
|
||||
|
||||
b. indicate if You modified the Licensed Material and
|
||||
retain an indication of any previous modifications; and
|
||||
|
||||
c. indicate the Licensed Material is licensed under this
|
||||
Public License, and include the text of, or the URI or
|
||||
hyperlink to, this Public License.
|
||||
|
||||
2. You may satisfy the conditions in Section 3(a)(1) in any
|
||||
reasonable manner based on the medium, means, and context in
|
||||
which You Share the Licensed Material. For example, it may be
|
||||
reasonable to satisfy the conditions by providing a URI or
|
||||
hyperlink to a resource that includes the required
|
||||
information.
|
||||
|
||||
3. If requested by the Licensor, You must remove any of the
|
||||
information required by Section 3(a)(1)(A) to the extent
|
||||
reasonably practicable.
|
||||
|
||||
b. ShareAlike.
|
||||
|
||||
In addition to the conditions in Section 3(a), if You Share
|
||||
Adapted Material You produce, the following conditions also apply.
|
||||
|
||||
1. The Adapter's License You apply must be a Creative Commons
|
||||
license with the same License Elements, this version or
|
||||
later, or a BY-SA Compatible License.
|
||||
|
||||
2. You must include the text of, or the URI or hyperlink to, the
|
||||
Adapter's License You apply. You may satisfy this condition
|
||||
in any reasonable manner based on the medium, means, and
|
||||
context in which You Share Adapted Material.
|
||||
|
||||
3. You may not offer or impose any additional or different terms
|
||||
or conditions on, or apply any Effective Technological
|
||||
Measures to, Adapted Material that restrict exercise of the
|
||||
rights granted under the Adapter's License You apply.
|
||||
|
||||
|
||||
Section 4 -- Sui Generis Database Rights.
|
||||
|
||||
Where the Licensed Rights include Sui Generis Database Rights that
|
||||
apply to Your use of the Licensed Material:
|
||||
|
||||
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
||||
to extract, reuse, reproduce, and Share all or a substantial
|
||||
portion of the contents of the database;
|
||||
|
||||
b. if You include all or a substantial portion of the database
|
||||
contents in a database in which You have Sui Generis Database
|
||||
Rights, then the database in which You have Sui Generis Database
|
||||
Rights (but not its individual contents) is Adapted Material,
|
||||
|
||||
including for purposes of Section 3(b); and
|
||||
c. You must comply with the conditions in Section 3(a) if You Share
|
||||
all or a substantial portion of the contents of the database.
|
||||
|
||||
For the avoidance of doubt, this Section 4 supplements and does not
|
||||
replace Your obligations under this Public License where the Licensed
|
||||
Rights include other Copyright and Similar Rights.
|
||||
|
||||
|
||||
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
||||
|
||||
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
||||
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
||||
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
||||
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
||||
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
||||
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
||||
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
||||
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
||||
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
||||
|
||||
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
||||
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
||||
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
||||
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
||||
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
||||
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
||||
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
||||
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
||||
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
||||
|
||||
c. The disclaimer of warranties and limitation of liability provided
|
||||
above shall be interpreted in a manner that, to the extent
|
||||
possible, most closely approximates an absolute disclaimer and
|
||||
waiver of all liability.
|
||||
|
||||
|
||||
Section 6 -- Term and Termination.
|
||||
|
||||
a. This Public License applies for the term of the Copyright and
|
||||
Similar Rights licensed here. However, if You fail to comply with
|
||||
this Public License, then Your rights under this Public License
|
||||
terminate automatically.
|
||||
|
||||
b. Where Your right to use the Licensed Material has terminated under
|
||||
Section 6(a), it reinstates:
|
||||
|
||||
1. automatically as of the date the violation is cured, provided
|
||||
it is cured within 30 days of Your discovery of the
|
||||
violation; or
|
||||
|
||||
2. upon express reinstatement by the Licensor.
|
||||
|
||||
For the avoidance of doubt, this Section 6(b) does not affect any
|
||||
right the Licensor may have to seek remedies for Your violations
|
||||
of this Public License.
|
||||
|
||||
c. For the avoidance of doubt, the Licensor may also offer the
|
||||
Licensed Material under separate terms or conditions or stop
|
||||
distributing the Licensed Material at any time; however, doing so
|
||||
will not terminate this Public License.
|
||||
|
||||
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
||||
License.
|
||||
|
||||
|
||||
Section 7 -- Other Terms and Conditions.
|
||||
|
||||
a. The Licensor shall not be bound by any additional or different
|
||||
terms or conditions communicated by You unless expressly agreed.
|
||||
|
||||
b. Any arrangements, understandings, or agreements regarding the
|
||||
Licensed Material not stated herein are separate from and
|
||||
independent of the terms and conditions of this Public License.
|
||||
|
||||
|
||||
Section 8 -- Interpretation.
|
||||
|
||||
a. For the avoidance of doubt, this Public License does not, and
|
||||
shall not be interpreted to, reduce, limit, restrict, or impose
|
||||
conditions on any use of the Licensed Material that could lawfully
|
||||
be made without permission under this Public License.
|
||||
|
||||
b. To the extent possible, if any provision of this Public License is
|
||||
deemed unenforceable, it shall be automatically reformed to the
|
||||
minimum extent necessary to make it enforceable. If the provision
|
||||
cannot be reformed, it shall be severed from this Public License
|
||||
without affecting the enforceability of the remaining terms and
|
||||
conditions.
|
||||
|
||||
c. No term or condition of this Public License will be waived and no
|
||||
failure to comply consented to unless expressly agreed to by the
|
||||
Licensor.
|
||||
|
||||
d. Nothing in this Public License constitutes or may be interpreted
|
||||
as a limitation upon, or waiver of, any privileges and immunities
|
||||
that apply to the Licensor or You, including from the legal
|
||||
processes of any jurisdiction or authority.
|
||||
|
||||
|
||||
=======================================================================
|
||||
|
||||
Creative Commons is not a party to its public licenses.
|
||||
Notwithstanding, Creative Commons may elect to apply one of its public
|
||||
licenses to material it publishes and in those instances will be
|
||||
considered the "Licensor." Except for the limited purpose of indicating
|
||||
that material is shared under a Creative Commons public license or as
|
||||
otherwise permitted by the Creative Commons policies published at
|
||||
creativecommons.org/policies, Creative Commons does not authorize the
|
||||
use of the trademark "Creative Commons" or any other trademark or logo
|
||||
of Creative Commons without its prior written consent including,
|
||||
without limitation, in connection with any unauthorized modifications
|
||||
to any of its public licenses or any other arrangements,
|
||||
understandings, or agreements concerning use of licensed material. For
|
||||
the avoidance of doubt, this paragraph does not form part of the public
|
||||
licenses.
|
||||
|
||||
Creative Commons may be contacted at creativecommons.org.
|
5
e2e/vendor/github.com/opencontainers/go-digest/MAINTAINERS
generated
vendored
Normal file
5
e2e/vendor/github.com/opencontainers/go-digest/MAINTAINERS
generated
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
Derek McGowan <derek@mcgstyle.net> (@dmcgowan)
|
||||
Stephen Day <stevvooe@gmail.com> (@stevvooe)
|
||||
Vincent Batts <vbatts@hashbangbash.com> (@vbatts)
|
||||
Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp> (@AkihiroSuda)
|
||||
Sebastiaan van Stijn <github@gone.nl> (@thaJeztah)
|
96
e2e/vendor/github.com/opencontainers/go-digest/README.md
generated
vendored
Normal file
96
e2e/vendor/github.com/opencontainers/go-digest/README.md
generated
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
# go-digest
|
||||
|
||||
[](https://godoc.org/github.com/opencontainers/go-digest) [](https://goreportcard.com/report/github.com/opencontainers/go-digest) [](https://travis-ci.org/opencontainers/go-digest)
|
||||
|
||||
Common digest package used across the container ecosystem.
|
||||
|
||||
Please see the [godoc](https://godoc.org/github.com/opencontainers/go-digest) for more information.
|
||||
|
||||
# What is a digest?
|
||||
|
||||
A digest is just a [hash](https://en.wikipedia.org/wiki/Hash_function).
|
||||
|
||||
The most common use case for a digest is to create a content identifier for use in [Content Addressable Storage](https://en.wikipedia.org/wiki/Content-addressable_storage) systems:
|
||||
|
||||
```go
|
||||
id := digest.FromBytes([]byte("my content"))
|
||||
```
|
||||
|
||||
In the example above, the id can be used to uniquely identify the byte slice "my content".
|
||||
This allows two disparate applications to agree on a verifiable identifier without having to trust one another.
|
||||
|
||||
An identifying digest can be verified, as follows:
|
||||
|
||||
```go
|
||||
if id != digest.FromBytes([]byte("my content")) {
|
||||
return errors.New("the content has changed!")
|
||||
}
|
||||
```
|
||||
|
||||
A `Verifier` type can be used to handle cases where an `io.Reader` makes more sense:
|
||||
|
||||
```go
|
||||
rd := getContent()
|
||||
verifier := id.Verifier()
|
||||
io.Copy(verifier, rd)
|
||||
|
||||
if !verifier.Verified() {
|
||||
return errors.New("the content has changed!")
|
||||
}
|
||||
```
|
||||
|
||||
Using [Merkle DAGs](https://en.wikipedia.org/wiki/Merkle_tree), this can power a rich, safe, content distribution system.
|
||||
|
||||
# Usage
|
||||
|
||||
While the [godoc](https://godoc.org/github.com/opencontainers/go-digest) is considered the best resource, a few important items need to be called out when using this package.
|
||||
|
||||
1. Make sure to import the hash implementations into your application or the package will panic.
|
||||
You should have something like the following in the main (or other entrypoint) of your application:
|
||||
|
||||
```go
|
||||
import (
|
||||
_ "crypto/sha256"
|
||||
_ "crypto/sha512"
|
||||
)
|
||||
```
|
||||
This may seem inconvenient but it allows you replace the hash
|
||||
implementations with others, such as https://github.com/stevvooe/resumable.
|
||||
|
||||
2. Even though `digest.Digest` may be assemblable as a string, _always_ verify your input with `digest.Parse` or use `Digest.Validate` when accepting untrusted input.
|
||||
While there are measures to avoid common problems, this will ensure you have valid digests in the rest of your application.
|
||||
|
||||
3. While alternative encodings of hash values (digests) are possible (for example, base64), this package deals exclusively with hex-encoded digests.
|
||||
|
||||
# Stability
|
||||
|
||||
The Go API, at this stage, is considered stable, unless otherwise noted.
|
||||
|
||||
As always, before using a package export, read the [godoc](https://godoc.org/github.com/opencontainers/go-digest).
|
||||
|
||||
# Contributing
|
||||
|
||||
This package is considered fairly complete.
|
||||
It has been in production in thousands (millions?) of deployments and is fairly battle-hardened.
|
||||
New additions will be met with skepticism.
|
||||
If you think there is a missing feature, please file a bug clearly describing the problem and the alternatives you tried before submitting a PR.
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
Participation in the OpenContainers community is governed by [OpenContainer's Code of Conduct][code-of-conduct].
|
||||
|
||||
## Security
|
||||
|
||||
If you find an issue, please follow the [security][security] protocol to report it.
|
||||
|
||||
# Copyright and license
|
||||
|
||||
Copyright © 2019, 2020 OCI Contributors
|
||||
Copyright © 2016 Docker, Inc.
|
||||
All rights reserved, except as follows.
|
||||
Code is released under the [Apache 2.0 license](LICENSE).
|
||||
This `README.md` file and the [`CONTRIBUTING.md`](CONTRIBUTING.md) file are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file [`LICENSE.docs`](LICENSE.docs).
|
||||
You may obtain a duplicate copy of the same license, titled CC BY-SA 4.0, at http://creativecommons.org/licenses/by-sa/4.0/.
|
||||
|
||||
[security]: https://github.com/opencontainers/org/blob/master/security
|
||||
[code-of-conduct]: https://github.com/opencontainers/org/blob/master/CODE_OF_CONDUCT.md
|
193
e2e/vendor/github.com/opencontainers/go-digest/algorithm.go
generated
vendored
Normal file
193
e2e/vendor/github.com/opencontainers/go-digest/algorithm.go
generated
vendored
Normal file
@ -0,0 +1,193 @@
|
||||
// Copyright 2019, 2020 OCI Contributors
|
||||
// Copyright 2017 Docker, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package digest
|
||||
|
||||
import (
|
||||
"crypto"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// Algorithm identifies and implementation of a digester by an identifier.
|
||||
// Note the that this defines both the hash algorithm used and the string
|
||||
// encoding.
|
||||
type Algorithm string
|
||||
|
||||
// supported digest types
|
||||
const (
|
||||
SHA256 Algorithm = "sha256" // sha256 with hex encoding (lower case only)
|
||||
SHA384 Algorithm = "sha384" // sha384 with hex encoding (lower case only)
|
||||
SHA512 Algorithm = "sha512" // sha512 with hex encoding (lower case only)
|
||||
|
||||
// Canonical is the primary digest algorithm used with the distribution
|
||||
// project. Other digests may be used but this one is the primary storage
|
||||
// digest.
|
||||
Canonical = SHA256
|
||||
)
|
||||
|
||||
var (
|
||||
// TODO(stevvooe): Follow the pattern of the standard crypto package for
|
||||
// registration of digests. Effectively, we are a registerable set and
|
||||
// common symbol access.
|
||||
|
||||
// algorithms maps values to hash.Hash implementations. Other algorithms
|
||||
// may be available but they cannot be calculated by the digest package.
|
||||
algorithms = map[Algorithm]crypto.Hash{
|
||||
SHA256: crypto.SHA256,
|
||||
SHA384: crypto.SHA384,
|
||||
SHA512: crypto.SHA512,
|
||||
}
|
||||
|
||||
// anchoredEncodedRegexps contains anchored regular expressions for hex-encoded digests.
|
||||
// Note that /A-F/ disallowed.
|
||||
anchoredEncodedRegexps = map[Algorithm]*regexp.Regexp{
|
||||
SHA256: regexp.MustCompile(`^[a-f0-9]{64}$`),
|
||||
SHA384: regexp.MustCompile(`^[a-f0-9]{96}$`),
|
||||
SHA512: regexp.MustCompile(`^[a-f0-9]{128}$`),
|
||||
}
|
||||
)
|
||||
|
||||
// Available returns true if the digest type is available for use. If this
|
||||
// returns false, Digester and Hash will return nil.
|
||||
func (a Algorithm) Available() bool {
|
||||
h, ok := algorithms[a]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
// check availability of the hash, as well
|
||||
return h.Available()
|
||||
}
|
||||
|
||||
func (a Algorithm) String() string {
|
||||
return string(a)
|
||||
}
|
||||
|
||||
// Size returns number of bytes returned by the hash.
|
||||
func (a Algorithm) Size() int {
|
||||
h, ok := algorithms[a]
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
return h.Size()
|
||||
}
|
||||
|
||||
// Set implemented to allow use of Algorithm as a command line flag.
|
||||
func (a *Algorithm) Set(value string) error {
|
||||
if value == "" {
|
||||
*a = Canonical
|
||||
} else {
|
||||
// just do a type conversion, support is queried with Available.
|
||||
*a = Algorithm(value)
|
||||
}
|
||||
|
||||
if !a.Available() {
|
||||
return ErrDigestUnsupported
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Digester returns a new digester for the specified algorithm. If the algorithm
|
||||
// does not have a digester implementation, nil will be returned. This can be
|
||||
// checked by calling Available before calling Digester.
|
||||
func (a Algorithm) Digester() Digester {
|
||||
return &digester{
|
||||
alg: a,
|
||||
hash: a.Hash(),
|
||||
}
|
||||
}
|
||||
|
||||
// Hash returns a new hash as used by the algorithm. If not available, the
|
||||
// method will panic. Check Algorithm.Available() before calling.
|
||||
func (a Algorithm) Hash() hash.Hash {
|
||||
if !a.Available() {
|
||||
// Empty algorithm string is invalid
|
||||
if a == "" {
|
||||
panic(fmt.Sprintf("empty digest algorithm, validate before calling Algorithm.Hash()"))
|
||||
}
|
||||
|
||||
// NOTE(stevvooe): A missing hash is usually a programming error that
|
||||
// must be resolved at compile time. We don't import in the digest
|
||||
// package to allow users to choose their hash implementation (such as
|
||||
// when using stevvooe/resumable or a hardware accelerated package).
|
||||
//
|
||||
// Applications that may want to resolve the hash at runtime should
|
||||
// call Algorithm.Available before call Algorithm.Hash().
|
||||
panic(fmt.Sprintf("%v not available (make sure it is imported)", a))
|
||||
}
|
||||
|
||||
return algorithms[a].New()
|
||||
}
|
||||
|
||||
// Encode encodes the raw bytes of a digest, typically from a hash.Hash, into
|
||||
// the encoded portion of the digest.
|
||||
func (a Algorithm) Encode(d []byte) string {
|
||||
// TODO(stevvooe): Currently, all algorithms use a hex encoding. When we
|
||||
// add support for back registration, we can modify this accordingly.
|
||||
return fmt.Sprintf("%x", d)
|
||||
}
|
||||
|
||||
// FromReader returns the digest of the reader using the algorithm.
|
||||
func (a Algorithm) FromReader(rd io.Reader) (Digest, error) {
|
||||
digester := a.Digester()
|
||||
|
||||
if _, err := io.Copy(digester.Hash(), rd); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return digester.Digest(), nil
|
||||
}
|
||||
|
||||
// FromBytes digests the input and returns a Digest.
|
||||
func (a Algorithm) FromBytes(p []byte) Digest {
|
||||
digester := a.Digester()
|
||||
|
||||
if _, err := digester.Hash().Write(p); err != nil {
|
||||
// Writes to a Hash should never fail. None of the existing
|
||||
// hash implementations in the stdlib or hashes vendored
|
||||
// here can return errors from Write. Having a panic in this
|
||||
// condition instead of having FromBytes return an error value
|
||||
// avoids unnecessary error handling paths in all callers.
|
||||
panic("write to hash function returned error: " + err.Error())
|
||||
}
|
||||
|
||||
return digester.Digest()
|
||||
}
|
||||
|
||||
// FromString digests the string input and returns a Digest.
|
||||
func (a Algorithm) FromString(s string) Digest {
|
||||
return a.FromBytes([]byte(s))
|
||||
}
|
||||
|
||||
// Validate validates the encoded portion string
|
||||
func (a Algorithm) Validate(encoded string) error {
|
||||
r, ok := anchoredEncodedRegexps[a]
|
||||
if !ok {
|
||||
return ErrDigestUnsupported
|
||||
}
|
||||
// Digests much always be hex-encoded, ensuring that their hex portion will
|
||||
// always be size*2
|
||||
if a.Size()*2 != len(encoded) {
|
||||
return ErrDigestInvalidLength
|
||||
}
|
||||
if r.MatchString(encoded) {
|
||||
return nil
|
||||
}
|
||||
return ErrDigestInvalidFormat
|
||||
}
|
157
e2e/vendor/github.com/opencontainers/go-digest/digest.go
generated
vendored
Normal file
157
e2e/vendor/github.com/opencontainers/go-digest/digest.go
generated
vendored
Normal file
@ -0,0 +1,157 @@
|
||||
// Copyright 2019, 2020 OCI Contributors
|
||||
// Copyright 2017 Docker, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package digest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Digest allows simple protection of hex formatted digest strings, prefixed
|
||||
// by their algorithm. Strings of type Digest have some guarantee of being in
|
||||
// the correct format and it provides quick access to the components of a
|
||||
// digest string.
|
||||
//
|
||||
// The following is an example of the contents of Digest types:
|
||||
//
|
||||
// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc
|
||||
//
|
||||
// This allows to abstract the digest behind this type and work only in those
|
||||
// terms.
|
||||
type Digest string
|
||||
|
||||
// NewDigest returns a Digest from alg and a hash.Hash object.
|
||||
func NewDigest(alg Algorithm, h hash.Hash) Digest {
|
||||
return NewDigestFromBytes(alg, h.Sum(nil))
|
||||
}
|
||||
|
||||
// NewDigestFromBytes returns a new digest from the byte contents of p.
|
||||
// Typically, this can come from hash.Hash.Sum(...) or xxx.SumXXX(...)
|
||||
// functions. This is also useful for rebuilding digests from binary
|
||||
// serializations.
|
||||
func NewDigestFromBytes(alg Algorithm, p []byte) Digest {
|
||||
return NewDigestFromEncoded(alg, alg.Encode(p))
|
||||
}
|
||||
|
||||
// NewDigestFromHex is deprecated. Please use NewDigestFromEncoded.
|
||||
func NewDigestFromHex(alg, hex string) Digest {
|
||||
return NewDigestFromEncoded(Algorithm(alg), hex)
|
||||
}
|
||||
|
||||
// NewDigestFromEncoded returns a Digest from alg and the encoded digest.
|
||||
func NewDigestFromEncoded(alg Algorithm, encoded string) Digest {
|
||||
return Digest(fmt.Sprintf("%s:%s", alg, encoded))
|
||||
}
|
||||
|
||||
// DigestRegexp matches valid digest types.
|
||||
var DigestRegexp = regexp.MustCompile(`[a-z0-9]+(?:[.+_-][a-z0-9]+)*:[a-zA-Z0-9=_-]+`)
|
||||
|
||||
// DigestRegexpAnchored matches valid digest types, anchored to the start and end of the match.
|
||||
var DigestRegexpAnchored = regexp.MustCompile(`^` + DigestRegexp.String() + `$`)
|
||||
|
||||
var (
|
||||
// ErrDigestInvalidFormat returned when digest format invalid.
|
||||
ErrDigestInvalidFormat = fmt.Errorf("invalid checksum digest format")
|
||||
|
||||
// ErrDigestInvalidLength returned when digest has invalid length.
|
||||
ErrDigestInvalidLength = fmt.Errorf("invalid checksum digest length")
|
||||
|
||||
// ErrDigestUnsupported returned when the digest algorithm is unsupported.
|
||||
ErrDigestUnsupported = fmt.Errorf("unsupported digest algorithm")
|
||||
)
|
||||
|
||||
// Parse parses s and returns the validated digest object. An error will
|
||||
// be returned if the format is invalid.
|
||||
func Parse(s string) (Digest, error) {
|
||||
d := Digest(s)
|
||||
return d, d.Validate()
|
||||
}
|
||||
|
||||
// FromReader consumes the content of rd until io.EOF, returning canonical digest.
|
||||
func FromReader(rd io.Reader) (Digest, error) {
|
||||
return Canonical.FromReader(rd)
|
||||
}
|
||||
|
||||
// FromBytes digests the input and returns a Digest.
|
||||
func FromBytes(p []byte) Digest {
|
||||
return Canonical.FromBytes(p)
|
||||
}
|
||||
|
||||
// FromString digests the input and returns a Digest.
|
||||
func FromString(s string) Digest {
|
||||
return Canonical.FromString(s)
|
||||
}
|
||||
|
||||
// Validate checks that the contents of d is a valid digest, returning an
|
||||
// error if not.
|
||||
func (d Digest) Validate() error {
|
||||
s := string(d)
|
||||
i := strings.Index(s, ":")
|
||||
if i <= 0 || i+1 == len(s) {
|
||||
return ErrDigestInvalidFormat
|
||||
}
|
||||
algorithm, encoded := Algorithm(s[:i]), s[i+1:]
|
||||
if !algorithm.Available() {
|
||||
if !DigestRegexpAnchored.MatchString(s) {
|
||||
return ErrDigestInvalidFormat
|
||||
}
|
||||
return ErrDigestUnsupported
|
||||
}
|
||||
return algorithm.Validate(encoded)
|
||||
}
|
||||
|
||||
// Algorithm returns the algorithm portion of the digest. This will panic if
|
||||
// the underlying digest is not in a valid format.
|
||||
func (d Digest) Algorithm() Algorithm {
|
||||
return Algorithm(d[:d.sepIndex()])
|
||||
}
|
||||
|
||||
// Verifier returns a writer object that can be used to verify a stream of
|
||||
// content against the digest. If the digest is invalid, the method will panic.
|
||||
func (d Digest) Verifier() Verifier {
|
||||
return hashVerifier{
|
||||
hash: d.Algorithm().Hash(),
|
||||
digest: d,
|
||||
}
|
||||
}
|
||||
|
||||
// Encoded returns the encoded portion of the digest. This will panic if the
|
||||
// underlying digest is not in a valid format.
|
||||
func (d Digest) Encoded() string {
|
||||
return string(d[d.sepIndex()+1:])
|
||||
}
|
||||
|
||||
// Hex is deprecated. Please use Digest.Encoded.
|
||||
func (d Digest) Hex() string {
|
||||
return d.Encoded()
|
||||
}
|
||||
|
||||
func (d Digest) String() string {
|
||||
return string(d)
|
||||
}
|
||||
|
||||
func (d Digest) sepIndex() int {
|
||||
i := strings.Index(string(d), ":")
|
||||
|
||||
if i < 0 {
|
||||
panic(fmt.Sprintf("no ':' separator in digest %q", d))
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
40
e2e/vendor/github.com/opencontainers/go-digest/digester.go
generated
vendored
Normal file
40
e2e/vendor/github.com/opencontainers/go-digest/digester.go
generated
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
// Copyright 2019, 2020 OCI Contributors
|
||||
// Copyright 2017 Docker, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package digest
|
||||
|
||||
import "hash"
|
||||
|
||||
// Digester calculates the digest of written data. Writes should go directly
|
||||
// to the return value of Hash, while calling Digest will return the current
|
||||
// value of the digest.
|
||||
type Digester interface {
|
||||
Hash() hash.Hash // provides direct access to underlying hash instance.
|
||||
Digest() Digest
|
||||
}
|
||||
|
||||
// digester provides a simple digester definition that embeds a hasher.
|
||||
type digester struct {
|
||||
alg Algorithm
|
||||
hash hash.Hash
|
||||
}
|
||||
|
||||
func (d *digester) Hash() hash.Hash {
|
||||
return d.hash
|
||||
}
|
||||
|
||||
func (d *digester) Digest() Digest {
|
||||
return NewDigest(d.alg, d.hash)
|
||||
}
|
62
e2e/vendor/github.com/opencontainers/go-digest/doc.go
generated
vendored
Normal file
62
e2e/vendor/github.com/opencontainers/go-digest/doc.go
generated
vendored
Normal file
@ -0,0 +1,62 @@
|
||||
// Copyright 2019, 2020 OCI Contributors
|
||||
// Copyright 2017 Docker, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package digest provides a generalized type to opaquely represent message
|
||||
// digests and their operations within the registry. The Digest type is
|
||||
// designed to serve as a flexible identifier in a content-addressable system.
|
||||
// More importantly, it provides tools and wrappers to work with
|
||||
// hash.Hash-based digests with little effort.
|
||||
//
|
||||
// Basics
|
||||
//
|
||||
// The format of a digest is simply a string with two parts, dubbed the
|
||||
// "algorithm" and the "digest", separated by a colon:
|
||||
//
|
||||
// <algorithm>:<digest>
|
||||
//
|
||||
// An example of a sha256 digest representation follows:
|
||||
//
|
||||
// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc
|
||||
//
|
||||
// The "algorithm" portion defines both the hashing algorithm used to calculate
|
||||
// the digest and the encoding of the resulting digest, which defaults to "hex"
|
||||
// if not otherwise specified. Currently, all supported algorithms have their
|
||||
// digests encoded in hex strings.
|
||||
//
|
||||
// In the example above, the string "sha256" is the algorithm and the hex bytes
|
||||
// are the "digest".
|
||||
//
|
||||
// Because the Digest type is simply a string, once a valid Digest is
|
||||
// obtained, comparisons are cheap, quick and simple to express with the
|
||||
// standard equality operator.
|
||||
//
|
||||
// Verification
|
||||
//
|
||||
// The main benefit of using the Digest type is simple verification against a
|
||||
// given digest. The Verifier interface, modeled after the stdlib hash.Hash
|
||||
// interface, provides a common write sink for digest verification. After
|
||||
// writing is complete, calling the Verifier.Verified method will indicate
|
||||
// whether or not the stream of bytes matches the target digest.
|
||||
//
|
||||
// Missing Features
|
||||
//
|
||||
// In addition to the above, we intend to add the following features to this
|
||||
// package:
|
||||
//
|
||||
// 1. A Digester type that supports write sink digest calculation.
|
||||
//
|
||||
// 2. Suspend and resume of ongoing digest calculations to support efficient digest verification in the registry.
|
||||
//
|
||||
package digest
|
46
e2e/vendor/github.com/opencontainers/go-digest/verifiers.go
generated
vendored
Normal file
46
e2e/vendor/github.com/opencontainers/go-digest/verifiers.go
generated
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
// Copyright 2019, 2020 OCI Contributors
|
||||
// Copyright 2017 Docker, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package digest
|
||||
|
||||
import (
|
||||
"hash"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Verifier presents a general verification interface to be used with message
|
||||
// digests and other byte stream verifications. Users instantiate a Verifier
|
||||
// from one of the various methods, write the data under test to it then check
|
||||
// the result with the Verified method.
|
||||
type Verifier interface {
|
||||
io.Writer
|
||||
|
||||
// Verified will return true if the content written to Verifier matches
|
||||
// the digest.
|
||||
Verified() bool
|
||||
}
|
||||
|
||||
type hashVerifier struct {
|
||||
digest Digest
|
||||
hash hash.Hash
|
||||
}
|
||||
|
||||
func (hv hashVerifier) Write(p []byte) (n int, err error) {
|
||||
return hv.hash.Write(p)
|
||||
}
|
||||
|
||||
func (hv hashVerifier) Verified() bool {
|
||||
return hv.digest == NewDigest(hv.digest.Algorithm(), hv.hash)
|
||||
}
|
191
e2e/vendor/github.com/opencontainers/runc/LICENSE
generated
vendored
Normal file
191
e2e/vendor/github.com/opencontainers/runc/LICENSE
generated
vendored
Normal file
@ -0,0 +1,191 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2014 Docker, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
17
e2e/vendor/github.com/opencontainers/runc/NOTICE
generated
vendored
Normal file
17
e2e/vendor/github.com/opencontainers/runc/NOTICE
generated
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
runc
|
||||
|
||||
Copyright 2012-2015 Docker, Inc.
|
||||
|
||||
This product includes software developed at Docker, Inc. (http://www.docker.com).
|
||||
|
||||
The following is courtesy of our legal counsel:
|
||||
|
||||
|
||||
Use and transfer of Docker may be subject to certain restrictions by the
|
||||
United States and other governments.
|
||||
It is your responsibility to ensure that your use and/or transfer does not
|
||||
violate applicable laws.
|
||||
|
||||
For more information, please see http://www.bis.doc.gov
|
||||
|
||||
See also http://www.apache.org/dev/crypto.html and/or seek legal counsel.
|
80
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
Normal file
80
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrDevicesUnsupported is an error returned when a cgroup manager
|
||||
// is not configured to set device rules.
|
||||
ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
|
||||
|
||||
// ErrRootless is returned by [Manager.Apply] when there is an error
|
||||
// creating cgroup directory, and cgroup.Rootless is set. In general,
|
||||
// this error is to be ignored.
|
||||
ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)")
|
||||
|
||||
// DevicesSetV1 and DevicesSetV2 are functions to set devices for
|
||||
// cgroup v1 and v2, respectively. Unless
|
||||
// [github.com/opencontainers/runc/libcontainer/cgroups/devices]
|
||||
// package is imported, it is set to nil, so cgroup managers can't
|
||||
// manage devices.
|
||||
DevicesSetV1 func(path string, r *configs.Resources) error
|
||||
DevicesSetV2 func(path string, r *configs.Resources) error
|
||||
)
|
||||
|
||||
type Manager interface {
|
||||
// Apply creates a cgroup, if not yet created, and adds a process
|
||||
// with the specified pid into that cgroup. A special value of -1
|
||||
// can be used to merely create a cgroup.
|
||||
Apply(pid int) error
|
||||
|
||||
// GetPids returns the PIDs of all processes inside the cgroup.
|
||||
GetPids() ([]int, error)
|
||||
|
||||
// GetAllPids returns the PIDs of all processes inside the cgroup
|
||||
// any all its sub-cgroups.
|
||||
GetAllPids() ([]int, error)
|
||||
|
||||
// GetStats returns cgroups statistics.
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
// Freeze sets the freezer cgroup to the specified state.
|
||||
Freeze(state configs.FreezerState) error
|
||||
|
||||
// Destroy removes cgroup.
|
||||
Destroy() error
|
||||
|
||||
// Path returns a cgroup path to the specified controller/subsystem.
|
||||
// For cgroupv2, the argument is unused and can be empty.
|
||||
Path(string) string
|
||||
|
||||
// Set sets cgroup resources parameters/limits. If the argument is nil,
|
||||
// the resources specified during Manager creation (or the previous call
|
||||
// to Set) are used.
|
||||
Set(r *configs.Resources) error
|
||||
|
||||
// GetPaths returns cgroup path(s) to save in a state file in order to
|
||||
// restore later.
|
||||
//
|
||||
// For cgroup v1, a key is cgroup subsystem name, and the value is the
|
||||
// path to the cgroup for this subsystem.
|
||||
//
|
||||
// For cgroup v2 unified hierarchy, a key is "", and the value is the
|
||||
// unified path.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetCgroups returns the cgroup data as configured.
|
||||
GetCgroups() (*configs.Cgroup, error)
|
||||
|
||||
// GetFreezerState retrieves the current FreezerState of the cgroup.
|
||||
GetFreezerState() (configs.FreezerState, error)
|
||||
|
||||
// Exists returns whether the cgroup path exists or not.
|
||||
Exists() bool
|
||||
|
||||
// OOMKillCount reports OOM kill count for the cgroup.
|
||||
OOMKillCount() (uint64, error)
|
||||
}
|
216
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
Normal file
216
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
Normal file
@ -0,0 +1,216 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// OpenFile opens a cgroup file in a given dir with given flags.
|
||||
// It is supposed to be used for cgroup files only, and returns
|
||||
// an error if the file is not a cgroup file.
|
||||
//
|
||||
// Arguments dir and file are joined together to form an absolute path
|
||||
// to a file being opened.
|
||||
func OpenFile(dir, file string, flags int) (*os.File, error) {
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
return openFile(dir, file, flags)
|
||||
}
|
||||
|
||||
// ReadFile reads data from a cgroup file in dir.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
func ReadFile(dir, file string) (string, error) {
|
||||
fd, err := OpenFile(dir, file, unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fd.Close()
|
||||
var buf bytes.Buffer
|
||||
|
||||
_, err = buf.ReadFrom(fd)
|
||||
return buf.String(), err
|
||||
}
|
||||
|
||||
// WriteFile writes data to a cgroup file in dir.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
func WriteFile(dir, file, data string) error {
|
||||
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
if _, err := fd.WriteString(data); err != nil {
|
||||
// Having data in the error message helps in debugging.
|
||||
return fmt.Errorf("failed to write %q: %w", data, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteFileByLine is the same as WriteFile, except if data contains newlines,
|
||||
// it is written line by line.
|
||||
func WriteFileByLine(dir, file, data string) error {
|
||||
i := strings.Index(data, "\n")
|
||||
if i == -1 {
|
||||
return WriteFile(dir, file, data)
|
||||
}
|
||||
|
||||
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
start := 0
|
||||
for {
|
||||
var line string
|
||||
if i == -1 {
|
||||
line = data[start:]
|
||||
} else {
|
||||
line = data[start : start+i+1]
|
||||
}
|
||||
_, err := fd.WriteString(line)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write %q: %w", line, err)
|
||||
}
|
||||
if i == -1 {
|
||||
break
|
||||
}
|
||||
start += i + 1
|
||||
i = strings.Index(data[start:], "\n")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
cgroupfsDir = "/sys/fs/cgroup"
|
||||
cgroupfsPrefix = cgroupfsDir + "/"
|
||||
)
|
||||
|
||||
var (
|
||||
// TestMode is set to true by unit tests that need "fake" cgroupfs.
|
||||
TestMode bool
|
||||
|
||||
cgroupRootHandle *os.File
|
||||
prepOnce sync.Once
|
||||
prepErr error
|
||||
resolveFlags uint64
|
||||
)
|
||||
|
||||
func prepareOpenat2() error {
|
||||
prepOnce.Do(func() {
|
||||
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
|
||||
Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC,
|
||||
})
|
||||
if err != nil {
|
||||
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
||||
if err != unix.ENOSYS {
|
||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||
} else {
|
||||
logrus.Debug("openat2 not available, falling back to securejoin")
|
||||
}
|
||||
return
|
||||
}
|
||||
file := os.NewFile(uintptr(fd), cgroupfsDir)
|
||||
|
||||
var st unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
|
||||
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
|
||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||
return
|
||||
}
|
||||
|
||||
cgroupRootHandle = file
|
||||
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
|
||||
if st.Type == unix.CGROUP2_SUPER_MAGIC {
|
||||
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
|
||||
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
|
||||
}
|
||||
})
|
||||
|
||||
return prepErr
|
||||
}
|
||||
|
||||
func openFile(dir, file string, flags int) (*os.File, error) {
|
||||
mode := os.FileMode(0)
|
||||
if TestMode && flags&os.O_WRONLY != 0 {
|
||||
// "emulate" cgroup fs for unit tests
|
||||
flags |= os.O_TRUNC | os.O_CREATE
|
||||
mode = 0o600
|
||||
}
|
||||
path := path.Join(dir, utils.CleanPath(file))
|
||||
if prepareOpenat2() != nil {
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
relPath := strings.TrimPrefix(path, cgroupfsPrefix)
|
||||
if len(relPath) == len(path) { // non-standard path, old system?
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
|
||||
fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath,
|
||||
&unix.OpenHow{
|
||||
Resolve: resolveFlags,
|
||||
Flags: uint64(flags) | unix.O_CLOEXEC,
|
||||
Mode: uint64(mode),
|
||||
})
|
||||
if err != nil {
|
||||
err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
||||
// Check if cgroupRootHandle is still opened to cgroupfsDir
|
||||
// (happens when this package is incorrectly used
|
||||
// across the chroot/pivot_root/mntns boundary, or
|
||||
// when /sys/fs/cgroup is remounted).
|
||||
//
|
||||
// TODO: if such usage will ever be common, amend this
|
||||
// to reopen cgroupRootHandle and retry openat2.
|
||||
fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
|
||||
defer closer()
|
||||
fdDest, _ := os.Readlink(fdPath)
|
||||
if fdDest != cgroupfsDir {
|
||||
// Wrap the error so it is clear that cgroupRootHandle
|
||||
// is opened to an unexpected/wrong directory.
|
||||
err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
|
||||
cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return os.NewFile(uintptr(fd), path), nil
|
||||
}
|
||||
|
||||
var errNotCgroupfs = errors.New("not a cgroup file")
|
||||
|
||||
// Can be changed by unit tests.
|
||||
var openFallback = openAndCheck
|
||||
|
||||
// openAndCheck is used when openat2(2) is not available. It checks the opened
|
||||
// file is on cgroupfs, returning an error otherwise.
|
||||
func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
|
||||
fd, err := os.OpenFile(path, flags, mode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if TestMode {
|
||||
return fd, nil
|
||||
}
|
||||
// Check this is a cgroupfs file.
|
||||
var st unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
|
||||
}
|
||||
if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
|
||||
}
|
||||
|
||||
return fd, nil
|
||||
}
|
311
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
Normal file
311
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
Normal file
@ -0,0 +1,311 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type BlkioGroup struct {
|
||||
weightFilename string
|
||||
weightDeviceFilename string
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Name() string {
|
||||
return "blkio"
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
|
||||
s.detectWeightFilenames(path)
|
||||
if r.BlkioWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.BlkioLeafWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range r.BlkioWeightDevice {
|
||||
if wd.Weight != 0 {
|
||||
if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if wd.LeafWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
examples:
|
||||
|
||||
blkio.sectors
|
||||
8:0 6792
|
||||
|
||||
blkio.io_service_bytes
|
||||
8:0 Read 1282048
|
||||
8:0 Write 2195456
|
||||
8:0 Sync 2195456
|
||||
8:0 Async 1282048
|
||||
8:0 Total 3477504
|
||||
Total 3477504
|
||||
|
||||
blkio.io_serviced
|
||||
8:0 Read 124
|
||||
8:0 Write 104
|
||||
8:0 Sync 104
|
||||
8:0 Async 124
|
||||
8:0 Total 228
|
||||
Total 228
|
||||
|
||||
blkio.io_queued
|
||||
8:0 Read 0
|
||||
8:0 Write 0
|
||||
8:0 Sync 0
|
||||
8:0 Async 0
|
||||
8:0 Total 0
|
||||
Total 0
|
||||
*/
|
||||
|
||||
func splitBlkioStatLine(r rune) bool {
|
||||
return r == ' ' || r == ':'
|
||||
}
|
||||
|
||||
func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return blkioStats, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
// format: dev type amount
|
||||
fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
|
||||
if len(fields) < 3 {
|
||||
if len(fields) == 2 && fields[0] == "Total" {
|
||||
// skip total line
|
||||
continue
|
||||
} else {
|
||||
return nil, malformedLine(dir, file, sc.Text())
|
||||
}
|
||||
}
|
||||
|
||||
v, err := strconv.ParseUint(fields[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
major := v
|
||||
|
||||
v, err = strconv.ParseUint(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
minor := v
|
||||
|
||||
op := ""
|
||||
valueField := 2
|
||||
if len(fields) == 4 {
|
||||
op = fields[2]
|
||||
valueField = 3
|
||||
}
|
||||
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
|
||||
return blkioStats, nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
type blkioStatInfo struct {
|
||||
filename string
|
||||
blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
|
||||
}
|
||||
bfqDebugStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.bfq.sectors_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_wait_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_merged_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_queued_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
bfqStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.bfq.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
cfqStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.sectors_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_service_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_wait_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_merged_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_queued_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
throttleRecursiveStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.throttle.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.throttle.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
baseStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.throttle.io_serviced",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.throttle.io_service_bytes",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
orderedStats := [][]blkioStatInfo{
|
||||
bfqDebugStats,
|
||||
bfqStats,
|
||||
cfqStats,
|
||||
throttleRecursiveStats,
|
||||
baseStats,
|
||||
}
|
||||
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
var err error
|
||||
|
||||
for _, statGroup := range orderedStats {
|
||||
for i, statInfo := range statGroup {
|
||||
if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil {
|
||||
// if error occurs on first file, move to next group
|
||||
if i == 0 {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
*statInfo.blkioStatEntriesPtr = blkioStats
|
||||
// finish if all stats are gathered
|
||||
if i == len(statGroup)-1 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) detectWeightFilenames(path string) {
|
||||
if s.weightFilename != "" {
|
||||
// Already detected.
|
||||
return
|
||||
}
|
||||
if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
|
||||
s.weightFilename = "blkio.weight"
|
||||
s.weightDeviceFilename = "blkio.weight_device"
|
||||
} else {
|
||||
s.weightFilename = "blkio.bfq.weight"
|
||||
s.weightDeviceFilename = "blkio.bfq.weight_device"
|
||||
}
|
||||
}
|
182
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
Normal file
182
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
Normal file
@ -0,0 +1,182 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type CpuGroup struct{}
|
||||
|
||||
func (s *CpuGroup) Name() string {
|
||||
return "cpu"
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We should set the real-Time group scheduling settings before moving
|
||||
// in the process because if the process is already in SCHED_RR mode
|
||||
// and no RT bandwidth is set, adding it will fail.
|
||||
if err := s.SetRtSched(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
|
||||
var period string
|
||||
if r.CpuRtPeriod != 0 {
|
||||
period = strconv.FormatUint(r.CpuRtPeriod, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil {
|
||||
// The values of cpu.rt_period_us and cpu.rt_runtime_us
|
||||
// are inter-dependent and need to be set in a proper order.
|
||||
// If the kernel rejects the new period value with EINVAL
|
||||
// and the new runtime value is also being set, let's
|
||||
// ignore the error for now and retry later.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuRtRuntime == 0 {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
period = ""
|
||||
}
|
||||
}
|
||||
if r.CpuRtRuntime != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if period != "" {
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpuShares != 0 {
|
||||
shares := r.CpuShares
|
||||
if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
// read it back
|
||||
sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// ... and check
|
||||
if shares > sharesRead {
|
||||
return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead)
|
||||
} else if shares < sharesRead {
|
||||
return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
|
||||
}
|
||||
}
|
||||
|
||||
var period string
|
||||
if r.CpuPeriod != 0 {
|
||||
period = strconv.FormatUint(r.CpuPeriod, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||
// Sometimes when the period to be set is smaller
|
||||
// than the current one, it is rejected by the kernel
|
||||
// (EINVAL) as old_quota/new_period exceeds the parent
|
||||
// cgroup quota limit. If this happens and the quota is
|
||||
// going to be set, ignore the error for now and retry
|
||||
// after setting the quota.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
period = ""
|
||||
}
|
||||
}
|
||||
|
||||
var burst string
|
||||
if r.CpuBurst != nil {
|
||||
burst = strconv.FormatUint(*r.CpuBurst, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil {
|
||||
if errors.Is(err, unix.ENOENT) {
|
||||
// If CPU burst knob is not available (e.g.
|
||||
// older kernel), ignore it.
|
||||
burst = ""
|
||||
} else {
|
||||
// Sometimes when the burst to be set is larger
|
||||
// than the current one, it is rejected by the kernel
|
||||
// (EINVAL) as old_quota/new_burst exceeds the parent
|
||||
// cgroup quota limit. If this happens and the quota is
|
||||
// going to be set, ignore the error for now and retry
|
||||
// after setting the quota.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
burst = ""
|
||||
}
|
||||
}
|
||||
if r.CpuQuota != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if period != "" {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if burst != "" {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r.CPUIdle != nil {
|
||||
idle := strconv.FormatInt(*r.CPUIdle, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.idle", idle); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return s.SetRtSched(path, r)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
const file = "cpu.stat"
|
||||
f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
switch t {
|
||||
case "nr_periods":
|
||||
stats.CpuStats.ThrottlingData.Periods = v
|
||||
|
||||
case "nr_throttled":
|
||||
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||
|
||||
case "throttled_time":
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
166
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
Normal file
166
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
Normal file
@ -0,0 +1,166 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupCpuacctStat = "cpuacct.stat"
|
||||
cgroupCpuacctUsageAll = "cpuacct.usage_all"
|
||||
|
||||
nanosecondsInSecond = 1000000000
|
||||
|
||||
userModeColumn = 1
|
||||
kernelModeColumn = 2
|
||||
cuacctUsageAllColumnsNumber = 3
|
||||
|
||||
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
||||
// on Linux it's a constant which is safe to be hard coded,
|
||||
// so we can avoid using cgo here. For details, see:
|
||||
// https://github.com/containerd/cgroups/pull/12
|
||||
clockTicks uint64 = 100
|
||||
)
|
||||
|
||||
type CpuacctGroup struct{}
|
||||
|
||||
func (s *CpuacctGroup) Name() string {
|
||||
return "cpuacct"
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
percpuUsage, err := getPercpuUsage(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns user and kernel usage breakdown in nanoseconds.
|
||||
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
||||
var userModeUsage, kernelModeUsage uint64
|
||||
const (
|
||||
userField = "user"
|
||||
systemField = "system"
|
||||
file = cgroupCpuacctStat
|
||||
)
|
||||
|
||||
// Expected format:
|
||||
// user <usage in ticks>
|
||||
// system <usage in ticks>
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
fields := strings.Fields(data)
|
||||
if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
|
||||
return 0, 0, malformedLine(path, file, data)
|
||||
}
|
||||
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
||||
}
|
||||
|
||||
func getPercpuUsage(path string) ([]uint64, error) {
|
||||
const file = "cpuacct.usage_percpu"
|
||||
percpuUsage := []uint64{}
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return percpuUsage, err
|
||||
}
|
||||
// TODO: use strings.SplitN instead.
|
||||
for _, value := range strings.Fields(data) {
|
||||
value, err := strconv.ParseUint(value, 10, 64)
|
||||
if err != nil {
|
||||
return percpuUsage, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
percpuUsage = append(percpuUsage, value)
|
||||
}
|
||||
return percpuUsage, nil
|
||||
}
|
||||
|
||||
func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
|
||||
usageKernelMode := []uint64{}
|
||||
usageUserMode := []uint64{}
|
||||
const file = cgroupCpuacctUsageAll
|
||||
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
} else if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
scanner := bufio.NewScanner(fd)
|
||||
scanner.Scan() // skipping header line
|
||||
|
||||
for scanner.Scan() {
|
||||
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)
|
||||
if len(lineFields) != cuacctUsageAllColumnsNumber {
|
||||
continue
|
||||
}
|
||||
|
||||
usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageKernelMode = append(usageKernelMode, usageInKernelMode)
|
||||
|
||||
usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageUserMode = append(usageUserMode, usageInUserMode)
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
}
|
245
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
Normal file
245
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
Normal file
@ -0,0 +1,245 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type CpusetGroup struct{}
|
||||
|
||||
func (s *CpusetGroup) Name() string {
|
||||
return "cpuset"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
return s.ApplyDir(path, r, pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpusetCpus != "" {
|
||||
if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.CpusetMems != "" {
|
||||
if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCpusetStat(path string, file string) ([]uint16, error) {
|
||||
var extracted []uint16
|
||||
fileContent, err := fscommon.GetCgroupParamString(path, file)
|
||||
if err != nil {
|
||||
return extracted, err
|
||||
}
|
||||
if len(fileContent) == 0 {
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
|
||||
}
|
||||
|
||||
for _, s := range strings.Split(fileContent, ",") {
|
||||
sp := strings.SplitN(s, "-", 3)
|
||||
switch len(sp) {
|
||||
case 3:
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
|
||||
case 2:
|
||||
min, err := strconv.ParseUint(sp[0], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
max, err := strconv.ParseUint(sp[1], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if min > max {
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
|
||||
}
|
||||
for i := min; i <= max; i++ {
|
||||
extracted = append(extracted, uint16(i))
|
||||
}
|
||||
case 1:
|
||||
value, err := strconv.ParseUint(s, 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
extracted = append(extracted, uint16(value))
|
||||
}
|
||||
}
|
||||
|
||||
return extracted, nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
var err error
|
||||
|
||||
stats.CPUSetStats.CPUs, err = getCpusetStat(path, "cpuset.cpus")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.cpu_exclusive")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.Mems, err = getCpusetStat(path, "cpuset.mems")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_hardwall")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_exclusive")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_migrate")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_page")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_slab")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_pressure")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, "cpuset.sched_load_balance")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, "cpuset.sched_relax_domain_level")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error {
|
||||
// This might happen if we have no cpuset cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
// 'ensureParent' start with parent because we don't want to
|
||||
// explicitly inherit from parent, it could conflict with
|
||||
// 'cpuset.cpu_exclusive'.
|
||||
if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
// We didn't inherit cpuset configs from parent, but we have
|
||||
// to ensure cpuset configs are set before moving task into the
|
||||
// cgroup.
|
||||
// The logic is, if user specified cpuset configs, use these
|
||||
// specified configs, otherwise, inherit from parent. This makes
|
||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||
// keep backward compatibility.
|
||||
if err := s.ensureCpusAndMems(dir, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(dir, pid)
|
||||
}
|
||||
|
||||
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
|
||||
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
|
||||
return
|
||||
}
|
||||
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
|
||||
return
|
||||
}
|
||||
return cpus, mems, nil
|
||||
}
|
||||
|
||||
// cpusetEnsureParent makes sure that the parent directories of current
|
||||
// are created and populated with the proper cpus and mems files copied
|
||||
// from their respective parent. It does that recursively, starting from
|
||||
// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
|
||||
func cpusetEnsureParent(current string) error {
|
||||
var st unix.Statfs_t
|
||||
|
||||
parent := filepath.Dir(current)
|
||||
err := unix.Statfs(parent, &st)
|
||||
if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return nil
|
||||
}
|
||||
// Treat non-existing directory as cgroupfs as it will be created,
|
||||
// and the root cpuset directory obviously exists.
|
||||
if err != nil && err != unix.ENOENT {
|
||||
return &os.PathError{Op: "statfs", Path: parent, Err: err}
|
||||
}
|
||||
|
||||
if err := cpusetEnsureParent(parent); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
return cpusetCopyIfNeeded(current, parent)
|
||||
}
|
||||
|
||||
// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||
// directory to the current directory if the file's contents are 0
|
||||
func cpusetCopyIfNeeded(current, parent string) error {
|
||||
currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if isEmptyCpuset(currentCpus) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if isEmptyCpuset(currentMems) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isEmptyCpuset(str string) bool {
|
||||
return str == "" || str == "\n"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error {
|
||||
if err := s.Set(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
return cpusetCopyIfNeeded(path, filepath.Dir(path))
|
||||
}
|
39
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
Normal file
39
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type DevicesGroup struct{}
|
||||
|
||||
func (s *DevicesGroup) Name() string {
|
||||
return "devices"
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
if path == "" {
|
||||
// Return error here, since devices cgroup
|
||||
// is a hard requirement for container's security.
|
||||
return errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
|
||||
if cgroups.DevicesSetV1 == nil {
|
||||
if len(r.Devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
return cgroups.ErrDevicesUnsupported
|
||||
}
|
||||
return cgroups.DevicesSetV1(path, r)
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
15
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
Normal file
15
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
type parseError = fscommon.ParseError
|
||||
|
||||
// malformedLine is used by all cgroupfs file parsers that expect a line
|
||||
// in a particular format but get some garbage instead.
|
||||
func malformedLine(path, file, line string) error {
|
||||
return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
|
||||
}
|
158
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
Normal file
158
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
Normal file
@ -0,0 +1,158 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type FreezerGroup struct{}
|
||||
|
||||
func (s *FreezerGroup) Name() string {
|
||||
return "freezer"
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
|
||||
switch r.Freezer {
|
||||
case configs.Frozen:
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
// Freezing failed, and it is bad and dangerous
|
||||
// to leave the cgroup in FROZEN or FREEZING
|
||||
// state, so (try to) thaw it back.
|
||||
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
}
|
||||
}()
|
||||
|
||||
// As per older kernel docs (freezer-subsystem.txt before
|
||||
// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
|
||||
// userspace should either retry or thaw. While current
|
||||
// kernel cgroup v1 docs no longer mention a need to retry,
|
||||
// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
|
||||
// freeze a cgroup v1 while new processes keep appearing in it
|
||||
// (either via fork/clone or by writing new PIDs to
|
||||
// cgroup.procs).
|
||||
//
|
||||
// The numbers below are empirically chosen to have a decent
|
||||
// chance to succeed in various scenarios ("runc pause/unpause
|
||||
// with parallel runc exec" and "bare freeze/unfreeze on a very
|
||||
// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
|
||||
//
|
||||
// Adding any amount of sleep in between retries did not
|
||||
// increase the chances of successful freeze in "pause/unpause
|
||||
// with parallel exec" reproducer. OTOH, adding an occasional
|
||||
// sleep helped for the case where the system is extremely slow
|
||||
// (CentOS 7 VM on GHA CI).
|
||||
//
|
||||
// Alas, this is still a game of chances, since the real fix
|
||||
// belong to the kernel (cgroup v2 do not have this bug).
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
if i%50 == 49 {
|
||||
// Occasional thaw and sleep improves
|
||||
// the chances to succeed in freezing
|
||||
// in case new processes keep appearing
|
||||
// in the cgroup.
|
||||
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if i%25 == 24 {
|
||||
// Occasional short sleep before reading
|
||||
// the state back also improves the chances to
|
||||
// succeed in freezing in case of a very slow
|
||||
// system.
|
||||
time.Sleep(10 * time.Microsecond)
|
||||
}
|
||||
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
state = strings.TrimSpace(state)
|
||||
switch state {
|
||||
case "FREEZING":
|
||||
continue
|
||||
case string(configs.Frozen):
|
||||
if i > 1 {
|
||||
logrus.Debugf("frozen after %d retries", i)
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
// should never happen
|
||||
return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
|
||||
}
|
||||
}
|
||||
// Despite our best efforts, it got stuck in FREEZING.
|
||||
return errors.New("unable to freeze")
|
||||
case configs.Thawed:
|
||||
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
|
||||
}
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
|
||||
for {
|
||||
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as
|
||||
// being in an "undefined" state.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
switch strings.TrimSpace(state) {
|
||||
case "THAWED":
|
||||
return configs.Thawed, nil
|
||||
case "FROZEN":
|
||||
// Find out whether the cgroup is frozen directly,
|
||||
// or indirectly via an ancestor.
|
||||
self, err := cgroups.ReadFile(path, "freezer.self_freezing")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat
|
||||
// it as being frozen.
|
||||
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Frozen, err
|
||||
}
|
||||
switch self {
|
||||
case "0\n":
|
||||
return configs.Thawed, nil
|
||||
case "1\n":
|
||||
return configs.Frozen, nil
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
|
||||
}
|
||||
case "FREEZING":
|
||||
// Make sure we get a stable freezer state, so retry if the cgroup
|
||||
// is still undergoing freezing. This should be a temporary delay.
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
|
||||
}
|
||||
}
|
||||
}
|
266
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
Normal file
266
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
Normal file
@ -0,0 +1,266 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var subsystems = []subsystem{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&RdmaGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
&NameGroup{GroupName: "misc", Join: true},
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
func init() {
|
||||
// If using cgroups-hybrid mode then add a "" controller indicating
|
||||
// it should join the cgroups v2.
|
||||
if cgroups.IsCgroup2HybridMode() {
|
||||
subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
|
||||
}
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// GetStats fills in the stats for the subsystem.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Apply creates and joins a cgroup, adding pid into it. Some
|
||||
// subsystems use resources to pre-configure the cgroup parents
|
||||
// before creating or joining it.
|
||||
Apply(path string, r *configs.Resources, pid int) error
|
||||
// Set sets the cgroup resources.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
func NewManager(cg *configs.Cgroup, paths map[string]string) (*Manager, error) {
|
||||
// Some v1 controllers (cpu, cpuset, and devices) expect
|
||||
// cgroups.Resources to not be nil in Apply.
|
||||
if cg.Resources == nil {
|
||||
return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
|
||||
}
|
||||
if cg.Resources.Unified != nil {
|
||||
return nil, cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
if paths == nil {
|
||||
var err error
|
||||
paths, err = initPaths(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &Manager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isIgnorableError returns whether err is a permission error (in the loose
|
||||
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||
// the normal EPERM.
|
||||
func isIgnorableError(rootless bool, err error) bool {
|
||||
// We do not ignore errors if we are root.
|
||||
if !rootless {
|
||||
return false
|
||||
}
|
||||
// Is it an ordinary EPERM?
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
return true
|
||||
}
|
||||
// Handle some specific syscall errors.
|
||||
var errno unix.Errno
|
||||
if errors.As(err, &errno) {
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (retErr error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
c := m.cgroups
|
||||
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
p, ok := m.paths[name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := sys.Apply(p, c.Resources, pid); err != nil {
|
||||
// In the case of rootless (including euid=0 in userns), where an
|
||||
// explicit cgroup path hasn't been set, we don't bail on error in
|
||||
// case of permission problems here, but do delete the path from
|
||||
// the m.paths map, since it is either non-existent and could not
|
||||
// be created, or the pid could not be added to it.
|
||||
//
|
||||
// Cases where limits for the subsystem have been set are handled
|
||||
// later by Set, which fails with a friendly error (see
|
||||
// if path == "" in Set).
|
||||
if isIgnorableError(c.Rootless, err) && c.Path == "" {
|
||||
retErr = cgroups.ErrRootless
|
||||
delete(m.paths, name)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
return retErr
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return cgroups.RemovePaths(m.paths)
|
||||
}
|
||||
|
||||
func (m *Manager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
// When rootless is true, errors from the device subsystem
|
||||
// are ignored, as it is really not expected to work.
|
||||
if m.cgroups.Rootless && sys.Name() == "devices" && !errors.Is(err, cgroups.ErrDevicesUnsupported) {
|
||||
continue
|
||||
}
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if path == "" {
|
||||
// We never created a path for this cgroup, so we cannot set
|
||||
// limits for it (though we have already tried at this point).
|
||||
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
path := m.Path("freezer")
|
||||
if path == "" {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
dir := m.Path("freezer")
|
||||
// If the container doesn't have the freezer cgroup, say it's undefined.
|
||||
if dir == "" {
|
||||
return configs.Undefined, nil
|
||||
}
|
||||
freezer := &FreezerGroup{}
|
||||
return freezer.GetState(dir)
|
||||
}
|
||||
|
||||
func (m *Manager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
||||
|
||||
func OOMKillCount(path string) (uint64, error) {
|
||||
return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
|
||||
}
|
||||
|
||||
func (m *Manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.Path("memory"))
|
||||
// Ignore ENOENT when rootless as it couldn't create cgroup.
|
||||
if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return c, err
|
||||
}
|
84
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
Normal file
84
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
Normal file
@ -0,0 +1,84 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type HugetlbGroup struct{}
|
||||
|
||||
func (s *HugetlbGroup) Name() string {
|
||||
return "hugetlb"
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
|
||||
const suffix = ".limit_in_bytes"
|
||||
skipRsvd := false
|
||||
|
||||
for _, hugetlb := range r.HugetlbLimit {
|
||||
prefix := "hugetlb." + hugetlb.Pagesize
|
||||
val := strconv.FormatUint(hugetlb.Limit, 10)
|
||||
if err := cgroups.WriteFile(path, prefix+suffix, val); err != nil {
|
||||
return err
|
||||
}
|
||||
if skipRsvd {
|
||||
continue
|
||||
}
|
||||
if err := cgroups.WriteFile(path, prefix+".rsvd"+suffix, val); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
skipRsvd = true
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
rsvd := ".rsvd"
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
again:
|
||||
prefix := "hugetlb." + pageSize + rsvd
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, prefix+".usage_in_bytes")
|
||||
if err != nil {
|
||||
if rsvd != "" && errors.Is(err, os.ErrNotExist) {
|
||||
rsvd = ""
|
||||
goto again
|
||||
}
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, prefix+".max_usage_in_bytes")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.MaxUsage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, prefix+".failcnt")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pageSize] = hugetlbStats
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
357
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
Normal file
357
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
Normal file
@ -0,0 +1,357 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
cgroupMemoryUsage = "memory.usage_in_bytes"
|
||||
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
|
||||
)
|
||||
|
||||
type MemoryGroup struct{}
|
||||
|
||||
func (s *MemoryGroup) Name() string {
|
||||
return "memory"
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func setMemory(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
|
||||
if !errors.Is(err, unix.EBUSY) {
|
||||
return err
|
||||
}
|
||||
|
||||
// EBUSY means the kernel can't set new limit as it's too low
|
||||
// (lower than the current usage). Return more specific error.
|
||||
usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
|
||||
}
|
||||
|
||||
func setSwap(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, r *configs.Resources) error {
|
||||
// If the memory update is set to -1 and the swap is not explicitly
|
||||
// set, we should also set swap to -1, it means unlimited memory.
|
||||
if r.Memory == -1 && r.MemorySwap == 0 {
|
||||
// Only set swap if it's enabled in kernel
|
||||
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||
r.MemorySwap = -1
|
||||
}
|
||||
}
|
||||
|
||||
// When memory and swap memory are both set, we need to handle the cases
|
||||
// for updating container.
|
||||
if r.Memory != 0 && r.MemorySwap != 0 {
|
||||
curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// When update memory limit, we should adapt the write sequence
|
||||
// for memory and swap memory, so it won't fail because the new
|
||||
// value and the old value don't fit kernel's validation.
|
||||
if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
|
||||
if err := setMemoryAndSwap(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// ignore KernelMemory and KernelMemoryTCP
|
||||
|
||||
if r.MemoryReservation != 0 {
|
||||
if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.OomKillDisable {
|
||||
if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if *r.MemorySwappiness <= 100 {
|
||||
if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
const file = "memory.stat"
|
||||
statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
|
||||
memoryUsage, err := getMemoryData(path, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryData(path, "memsw")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
stats.MemoryStats.SwapOnlyUsage = cgroups.MemoryData{
|
||||
Usage: swapUsage.Usage - memoryUsage.Usage,
|
||||
Failcnt: swapUsage.Failcnt - memoryUsage.Failcnt,
|
||||
}
|
||||
kernelUsage, err := getMemoryData(path, "kmem")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelUsage = kernelUsage
|
||||
kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if value == 1 {
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
}
|
||||
|
||||
pagesByNUMA, err := getPageUsageByNUMA(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.PageUsageByNUMA = pagesByNUMA
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = "memory." + name
|
||||
}
|
||||
var (
|
||||
usage = moduleName + ".usage_in_bytes"
|
||||
maxUsage = moduleName + ".max_usage_in_bytes"
|
||||
failcnt = moduleName + ".failcnt"
|
||||
limit = moduleName + ".limit_in_bytes"
|
||||
)
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if name != "" && os.IsNotExist(err) {
|
||||
// Ignore ENOENT as swap and kmem controllers
|
||||
// are optional in the kernel.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Usage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if name == "kmem" && os.IsNotExist(err) {
|
||||
// Ignore ENOENT as kmem.limit_in_bytes has
|
||||
// been removed in newer kernels.
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
|
||||
const (
|
||||
maxColumns = math.MaxUint8 + 1
|
||||
file = "memory.numa_stat"
|
||||
)
|
||||
stats := cgroups.PageUsageByNUMA{}
|
||||
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return stats, nil
|
||||
} else if err != nil {
|
||||
return stats, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
|
||||
// and it looks like this:
|
||||
//
|
||||
// total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for scanner.Scan() {
|
||||
var field *cgroups.PageStats
|
||||
|
||||
line := scanner.Text()
|
||||
columns := strings.SplitN(line, " ", maxColumns)
|
||||
for i, column := range columns {
|
||||
key, val, ok := strings.Cut(column, "=")
|
||||
// Some custom kernels have non-standard fields, like
|
||||
// numa_locality 0 0 0 0 0 0 0 0 0 0
|
||||
// numa_exectime 0
|
||||
if !ok {
|
||||
if i == 0 {
|
||||
// Ignore/skip those.
|
||||
break
|
||||
} else {
|
||||
// The first column was already validated,
|
||||
// so be strict to the rest.
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
}
|
||||
if i == 0 { // First column: key is name, val is total.
|
||||
field = getNUMAField(&stats, key)
|
||||
if field == nil { // unknown field (new kernel?)
|
||||
break
|
||||
}
|
||||
field.Total, err = strconv.ParseUint(val, 0, 64)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
field.Nodes = map[uint8]uint64{}
|
||||
} else { // Subsequent columns: key is N<id>, val is usage.
|
||||
if len(key) < 2 || key[0] != 'N' {
|
||||
// This is definitely an error.
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
|
||||
n, err := strconv.ParseUint(key[1:], 10, 8)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
usage, err := strconv.ParseUint(val, 10, 64)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
field.Nodes[uint8(n)] = usage
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats {
|
||||
switch name {
|
||||
case "total":
|
||||
return &stats.Total
|
||||
case "file":
|
||||
return &stats.File
|
||||
case "anon":
|
||||
return &stats.Anon
|
||||
case "unevictable":
|
||||
return &stats.Unevictable
|
||||
case "hierarchical_total":
|
||||
return &stats.Hierarchical.Total
|
||||
case "hierarchical_file":
|
||||
return &stats.Hierarchical.File
|
||||
case "hierarchical_anon":
|
||||
return &stats.Hierarchical.Anon
|
||||
case "hierarchical_unevictable":
|
||||
return &stats.Hierarchical.Unevictable
|
||||
}
|
||||
return nil
|
||||
}
|
31
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
Normal file
31
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NameGroup struct {
|
||||
GroupName string
|
||||
Join bool
|
||||
}
|
||||
|
||||
func (s *NameGroup) Name() string {
|
||||
return s.GroupName
|
||||
}
|
||||
|
||||
func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
if s.Join {
|
||||
// Ignore errors if the named cgroup does not exist.
|
||||
_ = apply(path, pid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
32
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
Normal file
32
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetClsGroup struct{}
|
||||
|
||||
func (s *NetClsGroup) Name() string {
|
||||
return "net_cls"
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.NetClsClassid != 0 {
|
||||
if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
30
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
Normal file
30
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetPrioGroup struct{}
|
||||
|
||||
func (s *NetPrioGroup) Name() string {
|
||||
return "net_prio"
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
|
||||
for _, prioMap := range r.NetPrioIfpriomap {
|
||||
if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
186
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
Normal file
186
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
Normal file
@ -0,0 +1,186 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
var (
|
||||
cgroupRootLock sync.Mutex
|
||||
cgroupRoot string
|
||||
)
|
||||
|
||||
const defaultCgroupRoot = "/sys/fs/cgroup"
|
||||
|
||||
func initPaths(cg *configs.Cgroup) (map[string]string, error) {
|
||||
root, err := rootPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
inner, err := innerPath(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
path, err := subsysPath(root, inner, name)
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem
|
||||
// is considered fatal for security reasons.
|
||||
if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func tryDefaultCgroupRoot() string {
|
||||
var st, pst unix.Stat_t
|
||||
|
||||
// (1) it should be a directory...
|
||||
err := unix.Lstat(defaultCgroupRoot, &st)
|
||||
if err != nil || st.Mode&unix.S_IFDIR == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (2) ... and a mount point ...
|
||||
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if st.Dev == pst.Dev {
|
||||
// parent dir has the same dev -- not a mount point
|
||||
return ""
|
||||
}
|
||||
|
||||
// (3) ... of 'tmpfs' fs type.
|
||||
var fst unix.Statfs_t
|
||||
err = unix.Statfs(defaultCgroupRoot, &fst)
|
||||
if err != nil || fst.Type != unix.TMPFS_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (4) it should have at least 1 entry ...
|
||||
dir, err := os.Open(defaultCgroupRoot)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer dir.Close()
|
||||
names, err := dir.Readdirnames(1)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if len(names) < 1 {
|
||||
return ""
|
||||
}
|
||||
// ... which is a cgroup mount point.
|
||||
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
|
||||
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
return defaultCgroupRoot
|
||||
}
|
||||
|
||||
// rootPath finds and returns path to the root of the cgroup hierarchies.
|
||||
func rootPath() (string, error) {
|
||||
cgroupRootLock.Lock()
|
||||
defer cgroupRootLock.Unlock()
|
||||
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// fast path
|
||||
cgroupRoot = tryDefaultCgroupRoot()
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// slow path: parse mountinfo
|
||||
mi, err := cgroups.GetCgroupMounts(false)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", errors.New("no cgroup mount found in mountinfo")
|
||||
}
|
||||
|
||||
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
|
||||
// use its parent directory.
|
||||
root := filepath.Dir(mi[0].Mountpoint)
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cgroupRoot = root
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
func innerPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||
innerPath := utils.CleanPath(c.Path)
|
||||
if innerPath == "" {
|
||||
cgParent := utils.CleanPath(c.Parent)
|
||||
cgName := utils.CleanPath(c.Name)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return innerPath, nil
|
||||
}
|
||||
|
||||
func subsysPath(root, inner, subsystem string) (string, error) {
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(inner) {
|
||||
mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
return filepath.Join(root, filepath.Base(mnt), inner), nil
|
||||
}
|
||||
|
||||
// Use GetOwnCgroupPath for dind-like cases, when cgroupns is not
|
||||
// available. This is ugly.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, inner), nil
|
||||
}
|
||||
|
||||
func apply(path string, pid int) error {
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
24
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
Normal file
24
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PerfEventGroup struct{}
|
||||
|
||||
func (s *PerfEventGroup) Name() string {
|
||||
return "perf_event"
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
62
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
Normal file
62
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
Normal file
@ -0,0 +1,62 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PidsGroup struct{}
|
||||
|
||||
func (s *PidsGroup) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if r.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(r.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
max, err := fscommon.GetCgroupParamUint(path, "pids.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If no limit is set, read from pids.max returns "max", which is
|
||||
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||
// represent "no limit" for pids as 0, thus this conversion.
|
||||
if max == math.MaxUint64 {
|
||||
max = 0
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
25
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
Normal file
25
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type RdmaGroup struct{}
|
||||
|
||||
func (s *RdmaGroup) Name() string {
|
||||
return "rdma"
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
|
||||
return fscommon.RdmaSet(path, r)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return fscommon.RdmaGetStats(path, stats)
|
||||
}
|
118
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
118
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpuSet(r *configs.Resources) bool {
|
||||
return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil || r.CpuBurst != nil
|
||||
}
|
||||
|
||||
func setCpu(dirPath string, r *configs.Resources) error {
|
||||
if !isCpuSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.CPUIdle != nil {
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.idle", strconv.FormatInt(*r.CPUIdle, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
|
||||
if r.CpuWeight != 0 {
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
var burst string
|
||||
if r.CpuBurst != nil {
|
||||
burst = strconv.FormatUint(*r.CpuBurst, 10)
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil {
|
||||
// Sometimes when the burst to be set is larger
|
||||
// than the current one, it is rejected by the kernel
|
||||
// (EINVAL) as old_quota/new_burst exceeds the parent
|
||||
// cgroup quota limit. If this happens and the quota is
|
||||
// going to be set, ignore the error for now and retry
|
||||
// after setting the quota.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
burst = ""
|
||||
}
|
||||
}
|
||||
if r.CpuQuota != 0 || r.CpuPeriod != 0 {
|
||||
str := "max"
|
||||
if r.CpuQuota > 0 {
|
||||
str = strconv.FormatInt(r.CpuQuota, 10)
|
||||
}
|
||||
period := r.CpuPeriod
|
||||
if period == 0 {
|
||||
// This default value is documented in
|
||||
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
period = 100000
|
||||
}
|
||||
str += " " + strconv.FormatUint(period, 10)
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil {
|
||||
return err
|
||||
}
|
||||
if burst != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
const file = "cpu.stat"
|
||||
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
switch t {
|
||||
case "usage_usec":
|
||||
stats.CpuStats.CpuUsage.TotalUsage = v * 1000
|
||||
|
||||
case "user_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000
|
||||
|
||||
case "system_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
|
||||
|
||||
case "nr_periods":
|
||||
stats.CpuStats.ThrottlingData.Periods = v
|
||||
|
||||
case "nr_throttled":
|
||||
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||
|
||||
case "throttled_usec":
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
28
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
28
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpusetSet(r *configs.Resources) bool {
|
||||
return r.CpusetCpus != "" || r.CpusetMems != ""
|
||||
}
|
||||
|
||||
func setCpuset(dirPath string, r *configs.Resources) error {
|
||||
if !isCpusetSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.CpusetCpus != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.CpusetMems != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
152
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
Normal file
152
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
Normal file
@ -0,0 +1,152 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func supportedControllers() (string, error) {
|
||||
return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
|
||||
}
|
||||
|
||||
// needAnyControllers returns whether we enable some supported controllers or not,
|
||||
// based on (1) controllers available and (2) resources that are being set.
|
||||
// We don't check "pseudo" controllers such as
|
||||
// "freezer" and "devices".
|
||||
func needAnyControllers(r *configs.Resources) (bool, error) {
|
||||
if r == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// list of all available controllers
|
||||
content, err := supportedControllers()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
avail := make(map[string]struct{})
|
||||
for _, ctr := range strings.Fields(content) {
|
||||
avail[ctr] = struct{}{}
|
||||
}
|
||||
|
||||
// check whether the controller if available or not
|
||||
have := func(controller string) bool {
|
||||
_, ok := avail[controller]
|
||||
return ok
|
||||
}
|
||||
|
||||
if isPidsSet(r) && have("pids") {
|
||||
return true, nil
|
||||
}
|
||||
if isMemorySet(r) && have("memory") {
|
||||
return true, nil
|
||||
}
|
||||
if isIoSet(r) && have("io") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpuSet(r) && have("cpu") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpusetSet(r) && have("cpuset") {
|
||||
return true, nil
|
||||
}
|
||||
if isHugeTlbSet(r) && have("hugetlb") {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// containsDomainController returns whether the current config contains domain controller or not.
|
||||
// Refer to: http://man7.org/linux/man-pages/man7/cgroups.7.html
|
||||
// As at Linux 4.19, the following controllers are threaded: cpu, perf_event, and pids.
|
||||
func containsDomainController(r *configs.Resources) bool {
|
||||
return isMemorySet(r) || isIoSet(r) || isCpuSet(r) || isHugeTlbSet(r)
|
||||
}
|
||||
|
||||
// CreateCgroupPath creates cgroupv2 path, enabling all the supported controllers.
|
||||
func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
|
||||
if !strings.HasPrefix(path, UnifiedMountpoint) {
|
||||
return fmt.Errorf("invalid cgroup path %s", path)
|
||||
}
|
||||
|
||||
content, err := supportedControllers()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
const (
|
||||
cgTypeFile = "cgroup.type"
|
||||
cgStCtlFile = "cgroup.subtree_control"
|
||||
)
|
||||
ctrs := strings.Fields(content)
|
||||
res := "+" + strings.Join(ctrs, " +")
|
||||
|
||||
elements := strings.Split(path, "/")
|
||||
elements = elements[3:]
|
||||
current := "/sys/fs"
|
||||
for i, e := range elements {
|
||||
current = filepath.Join(current, e)
|
||||
if i > 0 {
|
||||
if err := os.Mkdir(current, 0o755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// If the directory was created, be sure it is not left around on errors.
|
||||
current := current
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
os.Remove(current)
|
||||
}
|
||||
}()
|
||||
}
|
||||
cgType, _ := cgroups.ReadFile(current, cgTypeFile)
|
||||
cgType = strings.TrimSpace(cgType)
|
||||
switch cgType {
|
||||
// If the cgroup is in an invalid mode (usually this means there's an internal
|
||||
// process in the cgroup tree, because we created a cgroup under an
|
||||
// already-populated-by-other-processes cgroup), then we have to error out if
|
||||
// the user requested controllers which are not thread-aware. However, if all
|
||||
// the controllers requested are thread-aware we can simply put the cgroup into
|
||||
// threaded mode.
|
||||
case "domain invalid":
|
||||
if containsDomainController(c.Resources) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in an invalid state", current)
|
||||
} else {
|
||||
// Not entirely correct (in theory we'd always want to be a domain --
|
||||
// since that means we're a properly delegated cgroup subtree) but in
|
||||
// this case there's not much we can do and it's better than giving an
|
||||
// error.
|
||||
_ = cgroups.WriteFile(current, cgTypeFile, "threaded")
|
||||
}
|
||||
// If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers
|
||||
// (and you cannot usually take a cgroup out of threaded mode).
|
||||
case "domain threaded":
|
||||
fallthrough
|
||||
case "threaded":
|
||||
if containsDomainController(c.Resources) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in %s mode", current, cgType)
|
||||
}
|
||||
}
|
||||
}
|
||||
// enable all supported controllers
|
||||
if i < len(elements)-1 {
|
||||
if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil {
|
||||
// try write one by one
|
||||
allCtrs := strings.Split(res, " ")
|
||||
for _, ctr := range allCtrs {
|
||||
_ = cgroups.WriteFile(current, cgStCtlFile, ctr)
|
||||
}
|
||||
}
|
||||
// Some controllers might not be enabled when rootless or containerized,
|
||||
// but we don't catch the error here. (Caught in setXXX() functions.)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
102
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
102
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
const UnifiedMountpoint = "/sys/fs/cgroup"
|
||||
|
||||
func defaultDirPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||
}
|
||||
|
||||
return _defaultDirPath(UnifiedMountpoint, c.Path, c.Parent, c.Name)
|
||||
}
|
||||
|
||||
func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) {
|
||||
if (cgName != "" || cgParent != "") && cgPath != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||
innerPath := utils.CleanPath(cgPath)
|
||||
if innerPath == "" {
|
||||
cgParent := utils.CleanPath(cgParent)
|
||||
cgName := utils.CleanPath(cgName)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
if filepath.IsAbs(innerPath) {
|
||||
return filepath.Join(root, innerPath), nil
|
||||
}
|
||||
|
||||
// we don't need to use /proc/thread-self here because runc always runs
|
||||
// with every thread in the same cgroup. This lets us avoid having to do
|
||||
// runtime.LockOSThread.
|
||||
ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// The current user scope most probably has tasks in it already,
|
||||
// making it impossible to enable controllers for its sub-cgroup.
|
||||
// A parent cgroup (with no tasks in it) is what we need.
|
||||
ownCgroup = filepath.Dir(ownCgroup)
|
||||
|
||||
return filepath.Join(root, ownCgroup, innerPath), nil
|
||||
}
|
||||
|
||||
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
||||
func parseCgroupFile(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
for s.Scan() {
|
||||
var (
|
||||
text = s.Text()
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return "", fmt.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||
if parts[0] == "0" && parts[1] == "" {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "", errors.New("cgroup path not found")
|
||||
}
|
127
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
127
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
@ -0,0 +1,127 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
var stateStr string
|
||||
switch state {
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
case configs.Frozen:
|
||||
stateStr = "1"
|
||||
case configs.Thawed:
|
||||
stateStr = "0"
|
||||
default:
|
||||
return fmt.Errorf("invalid freezer state %q requested", state)
|
||||
}
|
||||
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
|
||||
if err != nil {
|
||||
// We can ignore this request as long as the user didn't ask us to
|
||||
// freeze the container (since without the freezer cgroup, that's a
|
||||
// no-op).
|
||||
if state != configs.Frozen {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("freezer not supported: %w", err)
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
if _, err := fd.WriteString(stateStr); err != nil {
|
||||
return err
|
||||
}
|
||||
// Confirm that the cgroup did actually change states.
|
||||
if actualState, err := readFreezer(dirPath, fd); err != nil {
|
||||
return err
|
||||
} else if actualState != state {
|
||||
return fmt.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getFreezer(dirPath string) (configs.FreezerState, error) {
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY)
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as being in
|
||||
// an "undefined" state.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
return readFreezer(dirPath, fd)
|
||||
}
|
||||
|
||||
func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
|
||||
if _, err := fd.Seek(0, 0); err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
state := make([]byte, 2)
|
||||
if _, err := fd.Read(state); err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
switch string(state) {
|
||||
case "0\n":
|
||||
return configs.Thawed, nil
|
||||
case "1\n":
|
||||
return waitFrozen(dirPath)
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state)
|
||||
}
|
||||
}
|
||||
|
||||
// waitFrozen polls cgroup.events until it sees "frozen 1" in it.
|
||||
func waitFrozen(dirPath string) (configs.FreezerState, error) {
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
// XXX: Simple wait/read/retry is used here. An implementation
|
||||
// based on poll(2) or inotify(7) is possible, but it makes the code
|
||||
// much more complicated. Maybe address this later.
|
||||
const (
|
||||
// Perform maxIter with waitTime in between iterations.
|
||||
waitTime = 10 * time.Millisecond
|
||||
maxIter = 1000
|
||||
)
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for i := 0; scanner.Scan(); {
|
||||
if i == maxIter {
|
||||
return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter)
|
||||
}
|
||||
line := scanner.Text()
|
||||
val := strings.TrimPrefix(line, "frozen ")
|
||||
if val != line { // got prefix
|
||||
if val[0] == '1' {
|
||||
return configs.Frozen, nil
|
||||
}
|
||||
|
||||
i++
|
||||
// wait, then re-read
|
||||
time.Sleep(waitTime)
|
||||
_, err := fd.Seek(0, 0)
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
}
|
||||
}
|
||||
// Should only reach here either on read error,
|
||||
// or if the file does not contain "frozen " line.
|
||||
return configs.Undefined, scanner.Err()
|
||||
}
|
318
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
318
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
@ -0,0 +1,318 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type parseError = fscommon.ParseError
|
||||
|
||||
type Manager struct {
|
||||
config *configs.Cgroup
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
dirPath string
|
||||
// controllers is content of "cgroup.controllers" file.
|
||||
// excludes pseudo-controllers ("devices" and "freezer").
|
||||
controllers map[string]struct{}
|
||||
}
|
||||
|
||||
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||
// If dirPath is empty, it is automatically set using config.
|
||||
func NewManager(config *configs.Cgroup, dirPath string) (*Manager, error) {
|
||||
if dirPath == "" {
|
||||
var err error
|
||||
dirPath, err = defaultDirPath(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
m := &Manager{
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m *Manager) getControllers() error {
|
||||
if m.controllers != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
|
||||
if err != nil {
|
||||
if m.config.Rootless && m.config.Path == "" {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
fields := strings.Fields(data)
|
||||
m.controllers = make(map[string]struct{}, len(fields))
|
||||
for _, c := range fields {
|
||||
m.controllers[c] = struct{}{}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
if err := CreateCgroupPath(m.dirPath, m.config); err != nil {
|
||||
// Related tests:
|
||||
// - "runc create (no limits + no cgrouppath + no permission) succeeds"
|
||||
// - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
|
||||
// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if m.config.Rootless {
|
||||
if m.config.Path == "" {
|
||||
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
|
||||
return cgroups.ErrRootless
|
||||
}
|
||||
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
var errs []error
|
||||
|
||||
st := cgroups.NewStats()
|
||||
|
||||
// pids (since kernel 4.5)
|
||||
if err := statPids(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
// Note cpu.stat is available even if the controller is not enabled.
|
||||
if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// PSI (since kernel 4.20).
|
||||
var err error
|
||||
if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// rdma (since kernel 4.11)
|
||||
if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// misc (since kernel 5.13)
|
||||
if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.config.Rootless {
|
||||
return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
}
|
||||
return st, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
if m.config.Resources == nil {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
if err := setFreezer(m.dirPath, state); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config.Resources.Freezer = state
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
return cgroups.RemovePath(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *Manager) Path(_ string) string {
|
||||
return m.dirPath
|
||||
}
|
||||
|
||||
func (m *Manager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
if err := m.getControllers(); err != nil {
|
||||
return err
|
||||
}
|
||||
// pids (since kernel 4.5)
|
||||
if err := setPids(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if err := setMemory(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if err := setIo(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if err := setCpu(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// devices (since kernel 4.15, pseudo-controller)
|
||||
//
|
||||
// When rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if err := setDevices(m.dirPath, r); err != nil {
|
||||
if !m.config.Rootless || errors.Is(err, cgroups.ErrDevicesUnsupported) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// cpuset (since kernel 5.0)
|
||||
if err := setCpuset(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if err := setHugeTlb(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// rdma (since kernel 4.11)
|
||||
if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// freezer (since kernel 5.2, pseudo-controller)
|
||||
if err := setFreezer(m.dirPath, r.Freezer); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := m.setUnified(r.Unified); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config.Resources = r
|
||||
return nil
|
||||
}
|
||||
|
||||
func setDevices(dirPath string, r *configs.Resources) error {
|
||||
if cgroups.DevicesSetV2 == nil {
|
||||
if len(r.Devices) > 0 {
|
||||
return cgroups.ErrDevicesUnsupported
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return cgroups.DevicesSetV2(dirPath, r)
|
||||
}
|
||||
|
||||
func (m *Manager) setUnified(res map[string]string) error {
|
||||
for k, v := range res {
|
||||
if strings.Contains(k, "/") {
|
||||
return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
|
||||
}
|
||||
if err := cgroups.WriteFileByLine(m.dirPath, k, v); err != nil {
|
||||
// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
|
||||
if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
|
||||
// Check if a controller is available,
|
||||
// to give more specific error if not.
|
||||
sk := strings.SplitN(k, ".", 2)
|
||||
if len(sk) != 2 {
|
||||
return fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k)
|
||||
}
|
||||
c := sk[0]
|
||||
if _, ok := m.controllers[c]; !ok && c != "cgroup" {
|
||||
return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("unable to set unified resource %q: %w", k, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
paths := make(map[string]string, 1)
|
||||
paths[""] = m.dirPath
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.config, nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
return getFreezer(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *Manager) Exists() bool {
|
||||
return cgroups.PathExists(m.dirPath)
|
||||
}
|
||||
|
||||
func OOMKillCount(path string) (uint64, error) {
|
||||
return fscommon.GetValueByKey(path, "memory.events", "oom_kill")
|
||||
}
|
||||
|
||||
func (m *Manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.dirPath)
|
||||
if err != nil && m.config.Rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return c, err
|
||||
}
|
||||
|
||||
func CheckMemoryUsage(dirPath string, r *configs.Resources) error {
|
||||
if !r.MemoryCheckBeforeUpdate {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.Memory <= 0 && r.MemorySwap <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
usage, err := fscommon.GetCgroupParamUint(dirPath, "memory.current")
|
||||
if err != nil {
|
||||
// This check is on best-effort basis, so if we can't read the
|
||||
// current usage (cgroup not yet created, or any other error),
|
||||
// we should not fail.
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.MemorySwap > 0 {
|
||||
if uint64(r.MemorySwap) <= usage {
|
||||
return fmt.Errorf("rejecting memory+swap limit %d <= usage %d", r.MemorySwap, usage)
|
||||
}
|
||||
}
|
||||
|
||||
if r.Memory > 0 {
|
||||
if uint64(r.Memory) <= usage {
|
||||
return fmt.Errorf("rejecting memory limit %d <= usage %d", r.Memory, usage)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
70
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
Normal file
70
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isHugeTlbSet(r *configs.Resources) bool {
|
||||
return len(r.HugetlbLimit) > 0
|
||||
}
|
||||
|
||||
func setHugeTlb(dirPath string, r *configs.Resources) error {
|
||||
if !isHugeTlbSet(r) {
|
||||
return nil
|
||||
}
|
||||
const suffix = ".max"
|
||||
skipRsvd := false
|
||||
for _, hugetlb := range r.HugetlbLimit {
|
||||
prefix := "hugetlb." + hugetlb.Pagesize
|
||||
val := strconv.FormatUint(hugetlb.Limit, 10)
|
||||
if err := cgroups.WriteFile(dirPath, prefix+suffix, val); err != nil {
|
||||
return err
|
||||
}
|
||||
if skipRsvd {
|
||||
continue
|
||||
}
|
||||
if err := cgroups.WriteFile(dirPath, prefix+".rsvd"+suffix, val); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
skipRsvd = true
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
rsvd := ".rsvd"
|
||||
for _, pagesize := range cgroups.HugePageSizes() {
|
||||
again:
|
||||
prefix := "hugetlb." + pagesize + rsvd
|
||||
value, err := fscommon.GetCgroupParamUint(dirPath, prefix+".current")
|
||||
if err != nil {
|
||||
if rsvd != "" && errors.Is(err, os.ErrNotExist) {
|
||||
rsvd = ""
|
||||
goto again
|
||||
}
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
value, err = fscommon.GetValueByKey(dirPath, prefix+".events", "max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pagesize] = hugetlbStats
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
193
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
193
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
@ -0,0 +1,193 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isIoSet(r *configs.Resources) bool {
|
||||
return r.BlkioWeight != 0 ||
|
||||
len(r.BlkioWeightDevice) > 0 ||
|
||||
len(r.BlkioThrottleReadBpsDevice) > 0 ||
|
||||
len(r.BlkioThrottleWriteBpsDevice) > 0 ||
|
||||
len(r.BlkioThrottleReadIOPSDevice) > 0 ||
|
||||
len(r.BlkioThrottleWriteIOPSDevice) > 0
|
||||
}
|
||||
|
||||
// bfqDeviceWeightSupported checks for per-device BFQ weight support (added
|
||||
// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight".
|
||||
func bfqDeviceWeightSupported(bfq *os.File) bool {
|
||||
if bfq == nil {
|
||||
return false
|
||||
}
|
||||
_, _ = bfq.Seek(0, 0)
|
||||
buf := make([]byte, 32)
|
||||
_, _ = bfq.Read(buf)
|
||||
// If only a single number (default weight) if read back, we have older kernel.
|
||||
_, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64)
|
||||
return err != nil
|
||||
}
|
||||
|
||||
func setIo(dirPath string, r *configs.Resources) error {
|
||||
if !isIoSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If BFQ IO scheduler is available, use it.
|
||||
var bfq *os.File
|
||||
if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 {
|
||||
var err error
|
||||
bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR)
|
||||
if err == nil {
|
||||
defer bfq.Close()
|
||||
} else if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.BlkioWeight != 0 {
|
||||
if bfq != nil { // Use BFQ.
|
||||
if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Fallback to io.weight with a conversion scheme.
|
||||
v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight)
|
||||
if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if bfqDeviceWeightSupported(bfq) {
|
||||
for _, wd := range r.BlkioWeightDevice {
|
||||
if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil {
|
||||
return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
|
||||
ret := map[string][]string{}
|
||||
f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
ret[parts[0]] = parts[1:]
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, &parseError{Path: dirPath, File: name, Err: err}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
const file = "io.stat"
|
||||
values, err := readCgroup2MapFile(dirPath, file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
var parsedStats cgroups.BlkioStats
|
||||
for k, v := range values {
|
||||
d := strings.Split(k, ":")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
major, err := strconv.ParseUint(d[0], 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
minor, err := strconv.ParseUint(d[1], 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
|
||||
for _, item := range v {
|
||||
d := strings.Split(item, "=")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
op := d[0]
|
||||
|
||||
// Map to the cgroupv1 naming and layout (in separate tables).
|
||||
var targetTable *[]cgroups.BlkioStatEntry
|
||||
switch op {
|
||||
// Equivalent to cgroupv1's blkio.io_service_bytes.
|
||||
case "rbytes":
|
||||
op = "Read"
|
||||
targetTable = &parsedStats.IoServiceBytesRecursive
|
||||
case "wbytes":
|
||||
op = "Write"
|
||||
targetTable = &parsedStats.IoServiceBytesRecursive
|
||||
// Equivalent to cgroupv1's blkio.io_serviced.
|
||||
case "rios":
|
||||
op = "Read"
|
||||
targetTable = &parsedStats.IoServicedRecursive
|
||||
case "wios":
|
||||
op = "Write"
|
||||
targetTable = &parsedStats.IoServicedRecursive
|
||||
default:
|
||||
// Skip over entries we cannot map to cgroupv1 stats for now.
|
||||
// In the future we should expand the stats struct to include
|
||||
// them.
|
||||
logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item)
|
||||
continue
|
||||
}
|
||||
|
||||
value, err := strconv.ParseUint(d[1], 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
|
||||
entry := cgroups.BlkioStatEntry{
|
||||
Op: op,
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
Value: value,
|
||||
}
|
||||
*targetTable = append(*targetTable, entry)
|
||||
}
|
||||
}
|
||||
stats.BlkioStats = parsedStats
|
||||
return nil
|
||||
}
|
242
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
242
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
@ -0,0 +1,242 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// numToStr converts an int64 value to a string for writing to a
|
||||
// cgroupv2 files with .min, .max, .low, or .high suffix.
|
||||
// The value of -1 is converted to "max" for cgroupv1 compatibility
|
||||
// (which used to write -1 to remove the limit).
|
||||
func numToStr(value int64) (ret string) {
|
||||
switch {
|
||||
case value == 0:
|
||||
ret = ""
|
||||
case value == -1:
|
||||
ret = "max"
|
||||
default:
|
||||
ret = strconv.FormatInt(value, 10)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func isMemorySet(r *configs.Resources) bool {
|
||||
return r.MemoryReservation != 0 || r.Memory != 0 || r.MemorySwap != 0
|
||||
}
|
||||
|
||||
func setMemory(dirPath string, r *configs.Resources) error {
|
||||
if !isMemorySet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := CheckMemoryUsage(dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
swapStr := numToStr(swap)
|
||||
if swapStr == "" && swap == 0 && r.MemorySwap > 0 {
|
||||
// memory and memorySwap set to the same value -- disable swap
|
||||
swapStr = "0"
|
||||
}
|
||||
// never write empty string to `memory.swap.max`, it means set to 0.
|
||||
if swapStr != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
|
||||
// If swap is not enabled, silently ignore setting to max or disabling it.
|
||||
if !(errors.Is(err, os.ErrNotExist) && (swapStr == "max" || swapStr == "0")) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if val := numToStr(r.Memory); val != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// cgroup.Resources.KernelMemory is ignored
|
||||
|
||||
if val := numToStr(r.MemoryReservation); val != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
const file = "memory.stat"
|
||||
statsFile, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"]
|
||||
// Unlike cgroup v1 which has memory.use_hierarchy binary knob,
|
||||
// cgroup v2 is always hierarchical.
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
|
||||
memoryUsage, err := getMemoryDataV2(dirPath, "")
|
||||
if err != nil {
|
||||
if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint {
|
||||
// The root cgroup does not have memory.{current,max,peak}
|
||||
// so emulate those using data from /proc/meminfo and
|
||||
// /sys/fs/cgroup/memory.stat
|
||||
return rootStatsFromMeminfo(stats)
|
||||
}
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapOnlyUsage, err := getMemoryDataV2(dirPath, "swap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapOnlyUsage = swapOnlyUsage
|
||||
swapUsage := swapOnlyUsage
|
||||
// As cgroup v1 reports SwapUsage values as mem+swap combined,
|
||||
// while in cgroup v2 swap values do not include memory,
|
||||
// report combined mem+swap for v1 compatibility.
|
||||
swapUsage.Usage += memoryUsage.Usage
|
||||
if swapUsage.Limit != math.MaxUint64 {
|
||||
swapUsage.Limit += memoryUsage.Limit
|
||||
}
|
||||
// The `MaxUsage` of mem+swap cannot simply combine mem with
|
||||
// swap. So set it to 0 for v1 compatibility.
|
||||
swapUsage.MaxUsage = 0
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = "memory." + name
|
||||
}
|
||||
usage := moduleName + ".current"
|
||||
limit := moduleName + ".max"
|
||||
maxUsage := moduleName + ".peak"
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if name != "" && os.IsNotExist(err) {
|
||||
// Ignore EEXIST as there's no swap accounting
|
||||
// if kernel CONFIG_MEMCG_SWAP is not set or
|
||||
// swapaccount=0 kernel boot parameter is given.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Usage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
// `memory.peak` since kernel 5.19
|
||||
// `memory.swap.peak` since kernel 6.5
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func rootStatsFromMeminfo(stats *cgroups.Stats) error {
|
||||
const file = "/proc/meminfo"
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Fields we are interested in.
|
||||
var (
|
||||
swap_free uint64
|
||||
swap_total uint64
|
||||
)
|
||||
mem := map[string]*uint64{
|
||||
"SwapFree": &swap_free,
|
||||
"SwapTotal": &swap_total,
|
||||
}
|
||||
|
||||
found := 0
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
parts := strings.SplitN(sc.Text(), ":", 3)
|
||||
if len(parts) != 2 {
|
||||
// Should not happen.
|
||||
continue
|
||||
}
|
||||
k := parts[0]
|
||||
p, ok := mem[k]
|
||||
if !ok {
|
||||
// Unknown field -- not interested.
|
||||
continue
|
||||
}
|
||||
vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB"))
|
||||
*p, err = strconv.ParseUint(vStr, 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{File: file, Err: errors.New("bad value for " + k)}
|
||||
}
|
||||
|
||||
found++
|
||||
if found == len(mem) {
|
||||
// Got everything we need -- skip the rest.
|
||||
break
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: "", File: file, Err: err}
|
||||
}
|
||||
|
||||
// cgroup v1 `usage_in_bytes` reports memory usage as the sum of
|
||||
// - rss (NR_ANON_MAPPED)
|
||||
// - cache (NR_FILE_PAGES)
|
||||
// cgroup v1 reports SwapUsage values as mem+swap combined
|
||||
// cgroup v2 reports rss and cache as anon and file.
|
||||
// sum `anon` + `file` to report the same value as `usage_in_bytes` in v1.
|
||||
// sum swap usage as combined mem+swap usage for consistency as well.
|
||||
stats.MemoryStats.Usage.Usage = stats.MemoryStats.Stats["anon"] + stats.MemoryStats.Stats["file"]
|
||||
stats.MemoryStats.Usage.Limit = math.MaxUint64
|
||||
stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024
|
||||
stats.MemoryStats.SwapUsage.Limit = math.MaxUint64
|
||||
stats.MemoryStats.SwapUsage.Usage += stats.MemoryStats.Usage.Usage
|
||||
|
||||
return nil
|
||||
}
|
52
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go
generated
vendored
Normal file
52
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/misc.go
generated
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
func statMisc(dirPath string, stats *cgroups.Stats) error {
|
||||
for _, file := range []string{"current", "events"} {
|
||||
fd, err := cgroups.OpenFile(dirPath, "misc."+file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s := bufio.NewScanner(fd)
|
||||
for s.Scan() {
|
||||
key, value, err := fscommon.ParseKeyValue(s.Text())
|
||||
if err != nil {
|
||||
fd.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
key = strings.TrimSuffix(key, ".max")
|
||||
|
||||
if _, ok := stats.MiscStats[key]; !ok {
|
||||
stats.MiscStats[key] = cgroups.MiscStats{}
|
||||
}
|
||||
|
||||
tmp := stats.MiscStats[key]
|
||||
|
||||
switch file {
|
||||
case "current":
|
||||
tmp.Usage = value
|
||||
case "events":
|
||||
tmp.Events = value
|
||||
}
|
||||
|
||||
stats.MiscStats[key] = tmp
|
||||
}
|
||||
fd.Close()
|
||||
|
||||
if err := s.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
72
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
72
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isPidsSet(r *configs.Resources) bool {
|
||||
return r.PidsLimit != 0
|
||||
}
|
||||
|
||||
func setPids(dirPath string, r *configs.Resources) error {
|
||||
if !isPidsSet(r) {
|
||||
return nil
|
||||
}
|
||||
if val := numToStr(r.PidsLimit); val != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error {
|
||||
// if the controller is not enabled, let's read PIDS from cgroups.procs
|
||||
// (or threads if cgroup.threads is enabled)
|
||||
contents, err := cgroups.ReadFile(dirPath, "cgroup.procs")
|
||||
if errors.Is(err, unix.ENOTSUP) {
|
||||
contents, err = cgroups.ReadFile(dirPath, "cgroup.threads")
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids := strings.Count(contents, "\n")
|
||||
stats.PidsStats.Current = uint64(pids)
|
||||
stats.PidsStats.Limit = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPids(dirPath string, stats *cgroups.Stats) error {
|
||||
current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current")
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return statPidsFromCgroupProcs(dirPath, stats)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
max, err := fscommon.GetCgroupParamUint(dirPath, "pids.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If no limit is set, read from pids.max returns "max", which is
|
||||
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||
// represent "no limit" for pids as 0, thus this conversion.
|
||||
if max == math.MaxUint64 {
|
||||
max = 0
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
89
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go
generated
vendored
Normal file
89
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/psi.go
generated
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
func statPSI(dirPath string, file string) (*cgroups.PSIStats, error) {
|
||||
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
// Kernel < 4.20, or CONFIG_PSI is not set,
|
||||
// or PSI stats are turned off for the cgroup
|
||||
// ("echo 0 > cgroup.pressure", kernel >= 6.1).
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var psistats cgroups.PSIStats
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
parts := strings.Fields(sc.Text())
|
||||
var pv *cgroups.PSIData
|
||||
switch parts[0] {
|
||||
case "some":
|
||||
pv = &psistats.Some
|
||||
case "full":
|
||||
pv = &psistats.Full
|
||||
}
|
||||
if pv != nil {
|
||||
*pv, err = parsePSIData(parts[1:])
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
if errors.Is(err, unix.ENOTSUP) {
|
||||
// Some kernels (e.g. CS9) may return ENOTSUP on read
|
||||
// if psi=1 kernel cmdline parameter is required.
|
||||
return nil, nil
|
||||
}
|
||||
return nil, &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
return &psistats, nil
|
||||
}
|
||||
|
||||
func parsePSIData(psi []string) (cgroups.PSIData, error) {
|
||||
data := cgroups.PSIData{}
|
||||
for _, f := range psi {
|
||||
kv := strings.SplitN(f, "=", 2)
|
||||
if len(kv) != 2 {
|
||||
return data, fmt.Errorf("invalid psi data: %q", f)
|
||||
}
|
||||
var pv *float64
|
||||
switch kv[0] {
|
||||
case "avg10":
|
||||
pv = &data.Avg10
|
||||
case "avg60":
|
||||
pv = &data.Avg60
|
||||
case "avg300":
|
||||
pv = &data.Avg300
|
||||
case "total":
|
||||
v, err := strconv.ParseUint(kv[1], 10, 64)
|
||||
if err != nil {
|
||||
return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
|
||||
}
|
||||
data.Total = v
|
||||
}
|
||||
if pv != nil {
|
||||
v, err := strconv.ParseFloat(kv[1], 64)
|
||||
if err != nil {
|
||||
return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
|
||||
}
|
||||
*pv = v
|
||||
}
|
||||
}
|
||||
return data, nil
|
||||
}
|
121
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
Normal file
121
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// parseRdmaKV parses raw string to RdmaEntry.
|
||||
func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
|
||||
var value uint32
|
||||
|
||||
parts := strings.SplitN(raw, "=", 3)
|
||||
|
||||
if len(parts) != 2 {
|
||||
return errors.New("Unable to parse RDMA entry")
|
||||
}
|
||||
|
||||
k, v := parts[0], parts[1]
|
||||
|
||||
if v == "max" {
|
||||
value = math.MaxUint32
|
||||
} else {
|
||||
val64, err := strconv.ParseUint(v, 10, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
value = uint32(val64)
|
||||
}
|
||||
if k == "hca_handle" {
|
||||
entry.HcaHandles = value
|
||||
} else if k == "hca_object" {
|
||||
entry.HcaObjects = value
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
|
||||
// example entry: mlx4_0 hca_handle=2 hca_object=2000
|
||||
func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
|
||||
rdmaEntries := make([]cgroups.RdmaEntry, 0)
|
||||
fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fd.Close() //nolint:errorlint
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for scanner.Scan() {
|
||||
parts := strings.SplitN(scanner.Text(), " ", 4)
|
||||
if len(parts) == 3 {
|
||||
entry := new(cgroups.RdmaEntry)
|
||||
entry.Device = parts[0]
|
||||
err = parseRdmaKV(parts[1], entry)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
err = parseRdmaKV(parts[2], entry)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
rdmaEntries = append(rdmaEntries, *entry)
|
||||
}
|
||||
}
|
||||
return rdmaEntries, scanner.Err()
|
||||
}
|
||||
|
||||
// RdmaGetStats returns rdma stats such as totalLimit and current entries.
|
||||
func RdmaGetStats(path string, stats *cgroups.Stats) error {
|
||||
currentEntries, err := readRdmaEntries(path, "rdma.current")
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
maxEntries, err := readRdmaEntries(path, "rdma.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If device got removed between reading two files, ignore returning stats.
|
||||
if len(currentEntries) != len(maxEntries) {
|
||||
return nil
|
||||
}
|
||||
|
||||
stats.RdmaStats = cgroups.RdmaStats{
|
||||
RdmaLimit: maxEntries,
|
||||
RdmaCurrent: currentEntries,
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createCmdString(device string, limits configs.LinuxRdma) string {
|
||||
cmdString := device
|
||||
if limits.HcaHandles != nil {
|
||||
cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
|
||||
}
|
||||
if limits.HcaObjects != nil {
|
||||
cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
|
||||
}
|
||||
return cmdString
|
||||
}
|
||||
|
||||
// RdmaSet sets RDMA resources.
|
||||
func RdmaSet(path string, r *configs.Resources) error {
|
||||
for device, limits := range r.Rdma {
|
||||
if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
145
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
Normal file
145
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
Normal file
@ -0,0 +1,145 @@
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
var (
|
||||
// Deprecated: use cgroups.OpenFile instead.
|
||||
OpenFile = cgroups.OpenFile
|
||||
// Deprecated: use cgroups.ReadFile instead.
|
||||
ReadFile = cgroups.ReadFile
|
||||
// Deprecated: use cgroups.WriteFile instead.
|
||||
WriteFile = cgroups.WriteFile
|
||||
)
|
||||
|
||||
// ParseError records a parse error details, including the file path.
|
||||
type ParseError struct {
|
||||
Path string
|
||||
File string
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *ParseError) Error() string {
|
||||
return "unable to parse " + path.Join(e.Path, e.File) + ": " + e.Err.Error()
|
||||
}
|
||||
|
||||
func (e *ParseError) Unwrap() error { return e.Err }
|
||||
|
||||
// ParseUint converts a string to an uint64 integer.
|
||||
// Negative values are returned at zero as, due to kernel bugs,
|
||||
// some of the memory cgroup stats can be negative.
|
||||
func ParseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if errors.Is(intErr, strconv.ErrRange) && intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return value, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// ParseKeyValue parses a space-separated "name value" kind of cgroup
|
||||
// parameter and returns its key as a string, and its value as uint64
|
||||
// (ParseUint is used to convert the value). For example,
|
||||
// "io_service_bytes 1234" will be returned as "io_service_bytes", 1234.
|
||||
func ParseKeyValue(t string) (string, uint64, error) {
|
||||
parts := strings.SplitN(t, " ", 3)
|
||||
if len(parts) != 2 {
|
||||
return "", 0, fmt.Errorf("line %q is not in key value format", t)
|
||||
}
|
||||
|
||||
value, err := ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, err
|
||||
}
|
||||
|
||||
return parts[0], value, nil
|
||||
}
|
||||
|
||||
// GetValueByKey reads a key-value pairs from the specified cgroup file,
|
||||
// and returns a value of the specified key. ParseUint is used for value
|
||||
// conversion.
|
||||
func GetValueByKey(path, file, key string) (uint64, error) {
|
||||
content, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
lines := strings.Split(content, "\n")
|
||||
for _, line := range lines {
|
||||
arr := strings.Split(line, " ")
|
||||
if len(arr) == 2 && arr[0] == key {
|
||||
val, err := ParseUint(arr[1], 10, 64)
|
||||
if err != nil {
|
||||
err = &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return val, err
|
||||
}
|
||||
}
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamUint reads a single uint64 value from the specified cgroup file.
|
||||
// If the value read is "max", the math.MaxUint64 is returned.
|
||||
func GetCgroupParamUint(path, file string) (uint64, error) {
|
||||
contents, err := GetCgroupParamString(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
contents = strings.TrimSpace(contents)
|
||||
if contents == "max" {
|
||||
return math.MaxUint64, nil
|
||||
}
|
||||
|
||||
res, err := ParseUint(contents, 10, 64)
|
||||
if err != nil {
|
||||
return res, &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamInt reads a single int64 value from specified cgroup file.
|
||||
// If the value read is "max", the math.MaxInt64 is returned.
|
||||
func GetCgroupParamInt(path, file string) (int64, error) {
|
||||
contents, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
contents = strings.TrimSpace(contents)
|
||||
if contents == "max" {
|
||||
return math.MaxInt64, nil
|
||||
}
|
||||
|
||||
res, err := strconv.ParseInt(contents, 10, 64)
|
||||
if err != nil {
|
||||
return res, &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamString reads a string from the specified cgroup file.
|
||||
func GetCgroupParamString(path, file string) (string, error) {
|
||||
contents, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return strings.TrimSpace(contents), nil
|
||||
}
|
27
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
Normal file
27
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// GetAllPids returns all pids from the cgroup identified by path, and all its
|
||||
// sub-cgroups.
|
||||
func GetAllPids(path string) ([]int, error) {
|
||||
var pids []int
|
||||
err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error {
|
||||
if iErr != nil {
|
||||
return iErr
|
||||
}
|
||||
if !d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
cPids, err := readProcsFile(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids = append(pids, cPids...)
|
||||
return nil
|
||||
})
|
||||
return pids, err
|
||||
}
|
78
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/manager/new.go
generated
vendored
Normal file
78
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/manager/new.go
generated
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
package manager
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// New returns the instance of a cgroup manager, which is chosen
|
||||
// based on the local environment (whether cgroup v1 or v2 is used)
|
||||
// and the config (whether config.Systemd is set or not).
|
||||
func New(config *configs.Cgroup) (cgroups.Manager, error) {
|
||||
return NewWithPaths(config, nil)
|
||||
}
|
||||
|
||||
// NewWithPaths is similar to New, and can be used in case cgroup paths
|
||||
// are already well known, which can save some resources.
|
||||
//
|
||||
// For cgroup v1, the keys are controller/subsystem name, and the values
|
||||
// are absolute filesystem paths to the appropriate cgroups.
|
||||
//
|
||||
// For cgroup v2, the only key allowed is "" (empty string), and the value
|
||||
// is the unified cgroup path.
|
||||
func NewWithPaths(config *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||
if config == nil {
|
||||
return nil, errors.New("cgroups/manager.New: config must not be nil")
|
||||
}
|
||||
if config.Systemd && !systemd.IsRunningSystemd() {
|
||||
return nil, errors.New("systemd not running on this host, cannot use systemd cgroups manager")
|
||||
}
|
||||
|
||||
// Cgroup v2 aka unified hierarchy.
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
path, err := getUnifiedPath(paths)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("manager.NewWithPaths: inconsistent paths: %w", err)
|
||||
}
|
||||
if config.Systemd {
|
||||
return systemd.NewUnifiedManager(config, path)
|
||||
}
|
||||
return fs2.NewManager(config, path)
|
||||
}
|
||||
|
||||
// Cgroup v1.
|
||||
if config.Systemd {
|
||||
return systemd.NewLegacyManager(config, paths)
|
||||
}
|
||||
|
||||
return fs.NewManager(config, paths)
|
||||
}
|
||||
|
||||
// getUnifiedPath is an implementation detail of libcontainer.
|
||||
// Historically, libcontainer.Create saves cgroup paths as per-subsystem path
|
||||
// map (as returned by cm.GetPaths(""), but with v2 we only have one single
|
||||
// unified path (with "" as a key).
|
||||
//
|
||||
// This function converts from that map to string (using "" as a key),
|
||||
// and also checks that the map itself is sane.
|
||||
func getUnifiedPath(paths map[string]string) (string, error) {
|
||||
if len(paths) > 1 {
|
||||
return "", fmt.Errorf("expected a single path, got %+v", paths)
|
||||
}
|
||||
path := paths[""]
|
||||
// can be empty
|
||||
if path != "" {
|
||||
if filepath.Clean(path) != path || !filepath.IsAbs(path) {
|
||||
return "", fmt.Errorf("invalid path: %q", path)
|
||||
}
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
200
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
Normal file
200
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
Normal file
@ -0,0 +1,200 @@
|
||||
package cgroups
|
||||
|
||||
type ThrottlingData struct {
|
||||
// Number of periods with throttling active
|
||||
Periods uint64 `json:"periods,omitempty"`
|
||||
// Number of periods when the container hit its throttling limit.
|
||||
ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
|
||||
// Aggregate time the container was throttled for in nanoseconds.
|
||||
ThrottledTime uint64 `json:"throttled_time,omitempty"`
|
||||
}
|
||||
|
||||
// CpuUsage denotes the usage of a CPU.
|
||||
// All CPU stats are aggregate since container inception.
|
||||
type CpuUsage struct {
|
||||
// Total CPU time consumed.
|
||||
// Units: nanoseconds.
|
||||
TotalUsage uint64 `json:"total_usage,omitempty"`
|
||||
// Total CPU time consumed per core.
|
||||
// Units: nanoseconds.
|
||||
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
|
||||
// CPU time consumed per core in kernel mode
|
||||
// Units: nanoseconds.
|
||||
PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"`
|
||||
// CPU time consumed per core in user mode
|
||||
// Units: nanoseconds.
|
||||
PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"`
|
||||
// Time spent by tasks of the cgroup in kernel mode.
|
||||
// Units: nanoseconds.
|
||||
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
|
||||
// Time spent by tasks of the cgroup in user mode.
|
||||
// Units: nanoseconds.
|
||||
UsageInUsermode uint64 `json:"usage_in_usermode"`
|
||||
}
|
||||
|
||||
type PSIData struct {
|
||||
Avg10 float64 `json:"avg10"`
|
||||
Avg60 float64 `json:"avg60"`
|
||||
Avg300 float64 `json:"avg300"`
|
||||
Total uint64 `json:"total"`
|
||||
}
|
||||
|
||||
type PSIStats struct {
|
||||
Some PSIData `json:"some,omitempty"`
|
||||
Full PSIData `json:"full,omitempty"`
|
||||
}
|
||||
|
||||
type CpuStats struct {
|
||||
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
|
||||
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
|
||||
PSI *PSIStats `json:"psi,omitempty"`
|
||||
}
|
||||
|
||||
type CPUSetStats struct {
|
||||
// List of the physical numbers of the CPUs on which processes
|
||||
// in that cpuset are allowed to execute
|
||||
CPUs []uint16 `json:"cpus,omitempty"`
|
||||
// cpu_exclusive flag
|
||||
CPUExclusive uint64 `json:"cpu_exclusive"`
|
||||
// List of memory nodes on which processes in that cpuset
|
||||
// are allowed to allocate memory
|
||||
Mems []uint16 `json:"mems,omitempty"`
|
||||
// mem_hardwall flag
|
||||
MemHardwall uint64 `json:"mem_hardwall"`
|
||||
// mem_exclusive flag
|
||||
MemExclusive uint64 `json:"mem_exclusive"`
|
||||
// memory_migrate flag
|
||||
MemoryMigrate uint64 `json:"memory_migrate"`
|
||||
// memory_spread page flag
|
||||
MemorySpreadPage uint64 `json:"memory_spread_page"`
|
||||
// memory_spread slab flag
|
||||
MemorySpreadSlab uint64 `json:"memory_spread_slab"`
|
||||
// memory_pressure
|
||||
MemoryPressure uint64 `json:"memory_pressure"`
|
||||
// sched_load balance flag
|
||||
SchedLoadBalance uint64 `json:"sched_load_balance"`
|
||||
// sched_relax_domain_level
|
||||
SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"`
|
||||
}
|
||||
|
||||
type MemoryData struct {
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
||||
|
||||
type MemoryStats struct {
|
||||
// memory used for cache
|
||||
Cache uint64 `json:"cache,omitempty"`
|
||||
// usage of memory
|
||||
Usage MemoryData `json:"usage,omitempty"`
|
||||
// usage of memory + swap
|
||||
SwapUsage MemoryData `json:"swap_usage,omitempty"`
|
||||
// usage of swap only
|
||||
SwapOnlyUsage MemoryData `json:"swap_only_usage,omitempty"`
|
||||
// usage of kernel memory
|
||||
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
|
||||
// usage of kernel TCP memory
|
||||
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
|
||||
// usage of memory pages by NUMA node
|
||||
// see chapter 5.6 of memory controller documentation
|
||||
PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"`
|
||||
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
|
||||
UseHierarchy bool `json:"use_hierarchy"`
|
||||
|
||||
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||
PSI *PSIStats `json:"psi,omitempty"`
|
||||
}
|
||||
|
||||
type PageUsageByNUMA struct {
|
||||
// Embedding is used as types can't be recursive.
|
||||
PageUsageByNUMAInner
|
||||
Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"`
|
||||
}
|
||||
|
||||
type PageUsageByNUMAInner struct {
|
||||
Total PageStats `json:"total,omitempty"`
|
||||
File PageStats `json:"file,omitempty"`
|
||||
Anon PageStats `json:"anon,omitempty"`
|
||||
Unevictable PageStats `json:"unevictable,omitempty"`
|
||||
}
|
||||
|
||||
type PageStats struct {
|
||||
Total uint64 `json:"total,omitempty"`
|
||||
Nodes map[uint8]uint64 `json:"nodes,omitempty"`
|
||||
}
|
||||
|
||||
type PidsStats struct {
|
||||
// number of pids in the cgroup
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
// active pids hard limit
|
||||
Limit uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStatEntry struct {
|
||||
Major uint64 `json:"major,omitempty"`
|
||||
Minor uint64 `json:"minor,omitempty"`
|
||||
Op string `json:"op,omitempty"`
|
||||
Value uint64 `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStats struct {
|
||||
// number of bytes transferred to and from the block device
|
||||
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
|
||||
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
|
||||
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
|
||||
IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
|
||||
IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
|
||||
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
|
||||
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
|
||||
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
|
||||
PSI *PSIStats `json:"psi,omitempty"`
|
||||
}
|
||||
|
||||
type HugetlbStats struct {
|
||||
// current res_counter usage for hugetlb
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
// maximum usage ever recorded.
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
// number of times hugetlb usage allocation failure.
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
type RdmaEntry struct {
|
||||
Device string `json:"device,omitempty"`
|
||||
HcaHandles uint32 `json:"hca_handles,omitempty"`
|
||||
HcaObjects uint32 `json:"hca_objects,omitempty"`
|
||||
}
|
||||
|
||||
type RdmaStats struct {
|
||||
RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"`
|
||||
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
|
||||
}
|
||||
|
||||
type MiscStats struct {
|
||||
// current resource usage for a key in misc
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
// number of times the resource usage was about to go over the max boundary
|
||||
Events uint64 `json:"events,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
CpuStats CpuStats `json:"cpu_stats,omitempty"`
|
||||
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
|
||||
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
|
||||
PidsStats PidsStats `json:"pids_stats,omitempty"`
|
||||
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
|
||||
// the map is in the format "size of hugepage: stats of the hugepage"
|
||||
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
|
||||
RdmaStats RdmaStats `json:"rdma_stats,omitempty"`
|
||||
// the map is in the format "misc resource name: stats of the key"
|
||||
MiscStats map[string]MiscStats `json:"misc_stats,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
|
||||
hugetlbStats := make(map[string]HugetlbStats)
|
||||
miscStats := make(map[string]MiscStats)
|
||||
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats}
|
||||
}
|
363
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
363
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
@ -0,0 +1,363 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
// Default kernel value for cpu quota period is 100000 us (100 ms), same for v1 and v2.
|
||||
// v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and
|
||||
// v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
defCPUQuotaPeriod = uint64(100000)
|
||||
)
|
||||
|
||||
var (
|
||||
versionOnce sync.Once
|
||||
version int
|
||||
|
||||
isRunningSystemdOnce sync.Once
|
||||
isRunningSystemd bool
|
||||
|
||||
// GenerateDeviceProps is a function to generate systemd device
|
||||
// properties, used by Set methods. Unless
|
||||
// [github.com/opencontainers/runc/libcontainer/cgroups/devices]
|
||||
// package is imported, it is set to nil, so cgroup managers can't
|
||||
// configure devices.
|
||||
GenerateDeviceProps func(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error)
|
||||
)
|
||||
|
||||
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||
// It was borrowed here to avoid a dependency on cgo.
|
||||
//
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func IsRunningSystemd() bool {
|
||||
isRunningSystemdOnce.Do(func() {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
isRunningSystemd = err == nil && fi.IsDir()
|
||||
})
|
||||
return isRunningSystemd
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// /test.slice/test-a.slice/test-a-b.slice.
|
||||
func ExpandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
// if input was -.slice, we should just return root now
|
||||
if sliceName == "-" {
|
||||
return "/", nil
|
||||
}
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += "/" + prefix + component + suffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||
if !strings.HasSuffix(c.Name, ".slice") {
|
||||
return c.ScopePrefix + "-" + c.Name + ".scope"
|
||||
}
|
||||
return c.Name
|
||||
}
|
||||
|
||||
// This code should be in sync with getUnitName.
|
||||
func getUnitType(unitName string) string {
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
return "Slice"
|
||||
}
|
||||
return "Scope"
|
||||
}
|
||||
|
||||
// isDbusError returns true if the error is a specific dbus error.
|
||||
func isDbusError(err error, name string) bool {
|
||||
if err != nil {
|
||||
var derr dbus.Error
|
||||
if errors.As(err, &derr) {
|
||||
return strings.Contains(derr.Name, name)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
return isDbusError(err, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
|
||||
func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error {
|
||||
statusChan := make(chan string, 1)
|
||||
retry := true
|
||||
|
||||
retry:
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
_, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
if ignoreExist {
|
||||
// TODO: remove this hack.
|
||||
// This is kubelet making sure a slice exists (see
|
||||
// https://github.com/opencontainers/runc/pull/1124).
|
||||
return nil
|
||||
}
|
||||
if retry {
|
||||
// In case a unit with the same name exists, this may
|
||||
// be a leftover failed unit. Reset it, so systemd can
|
||||
// remove it, and retry once.
|
||||
err = resetFailedUnit(cm, unitName)
|
||||
if err != nil {
|
||||
logrus.Warnf("unable to reset failed unit: %v", err)
|
||||
}
|
||||
retry = false
|
||||
goto retry
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
timeout := time.NewTimer(30 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
_ = resetFailedUnit(cm, unitName)
|
||||
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
||||
}
|
||||
case <-timeout.C:
|
||||
_ = resetFailedUnit(cm, unitName)
|
||||
return errors.New("Timeout waiting for systemd to create " + unitName)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func stopUnit(cm *dbusConnManager, unitName string) error {
|
||||
statusChan := make(chan string, 1)
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
_, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan)
|
||||
return err
|
||||
})
|
||||
if err == nil {
|
||||
timeout := time.NewTimer(30 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
|
||||
}
|
||||
case <-timeout.C:
|
||||
return errors.New("Timed out while waiting for systemd to remove " + unitName)
|
||||
}
|
||||
}
|
||||
|
||||
// In case of a failed unit, let systemd remove it.
|
||||
_ = resetFailedUnit(cm, unitName)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func resetFailedUnit(cm *dbusConnManager, name string) error {
|
||||
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
return c.ResetFailedUnitContext(context.TODO(), name)
|
||||
})
|
||||
}
|
||||
|
||||
func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) {
|
||||
var prop *systemdDbus.Property
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) {
|
||||
prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName)
|
||||
return Err
|
||||
})
|
||||
return prop, err
|
||||
}
|
||||
|
||||
func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
|
||||
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
|
||||
})
|
||||
}
|
||||
|
||||
func getManagerProperty(cm *dbusConnManager, name string) (string, error) {
|
||||
str := ""
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
var err error
|
||||
str, err = c.GetManagerProperty(name)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strconv.Unquote(str)
|
||||
}
|
||||
|
||||
func systemdVersion(cm *dbusConnManager) int {
|
||||
versionOnce.Do(func() {
|
||||
version = -1
|
||||
verStr, err := getManagerProperty(cm, "Version")
|
||||
if err == nil {
|
||||
version, err = systemdVersionAtoi(verStr)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("unable to get systemd version")
|
||||
}
|
||||
})
|
||||
|
||||
return version
|
||||
}
|
||||
|
||||
// systemdVersionAtoi extracts a numeric systemd version from the argument.
|
||||
// The argument should be of the form: "v245.4-1.fc32", "245", "v245-1.fc32",
|
||||
// "245-1.fc32" (with or without quotes). The result for all of the above
|
||||
// should be 245.
|
||||
func systemdVersionAtoi(str string) (int, error) {
|
||||
// Unconditionally remove the leading prefix ("v).
|
||||
str = strings.TrimLeft(str, `"v`)
|
||||
// Match on the first integer we can grab.
|
||||
for i := 0; i < len(str); i++ {
|
||||
if str[i] < '0' || str[i] > '9' {
|
||||
// First non-digit: cut the tail.
|
||||
str = str[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
ver, err := strconv.Atoi(str)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("can't parse version: %w", err)
|
||||
}
|
||||
return ver, nil
|
||||
}
|
||||
|
||||
func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
||||
if period != 0 {
|
||||
// systemd only supports CPUQuotaPeriodUSec since v242
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer >= 242 {
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPeriodUSec", period))
|
||||
} else {
|
||||
logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+
|
||||
" (setting will still be applied to cgroupfs)", sdVer)
|
||||
}
|
||||
}
|
||||
if quota != 0 || period != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if quota > 0 {
|
||||
if period == 0 {
|
||||
// assume the default
|
||||
period = defCPUQuotaPeriod
|
||||
}
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
}
|
||||
|
||||
func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error {
|
||||
if cpus == "" && mems == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd only supports AllowedCPUs/AllowedMemoryNodes since v244
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer < 244 {
|
||||
logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+
|
||||
" (settings will still be applied to cgroupfs)", sdVer)
|
||||
return nil
|
||||
}
|
||||
|
||||
if cpus != "" {
|
||||
bits, err := RangeToBits(cpus)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
|
||||
cpus, err)
|
||||
}
|
||||
*props = append(*props,
|
||||
newProp("AllowedCPUs", bits))
|
||||
}
|
||||
if mems != "" {
|
||||
bits, err := RangeToBits(mems)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
|
||||
mems, err)
|
||||
}
|
||||
*props = append(*props,
|
||||
newProp("AllowedMemoryNodes", bits))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateDeviceProperties takes the configured device rules and generates a
|
||||
// corresponding set of systemd properties to configure the devices correctly.
|
||||
func generateDeviceProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
if GenerateDeviceProps == nil {
|
||||
if len(r.Devices) > 0 {
|
||||
return nil, cgroups.ErrDevicesUnsupported
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return GenerateDeviceProps(r, systemdVersion(cm))
|
||||
}
|
60
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
Normal file
60
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math/big"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RangeToBits converts a text representation of a CPU mask (as written to
|
||||
// or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes
|
||||
// with the corresponding bits set (as consumed by systemd over dbus as
|
||||
// AllowedCPUs/AllowedMemoryNodes unit property value).
|
||||
func RangeToBits(str string) ([]byte, error) {
|
||||
bits := new(big.Int)
|
||||
|
||||
for _, r := range strings.Split(str, ",") {
|
||||
// allow extra spaces around
|
||||
r = strings.TrimSpace(r)
|
||||
// allow empty elements (extra commas)
|
||||
if r == "" {
|
||||
continue
|
||||
}
|
||||
startr, endr, ok := strings.Cut(r, "-")
|
||||
if ok {
|
||||
start, err := strconv.ParseUint(startr, 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
end, err := strconv.ParseUint(endr, 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if start > end {
|
||||
return nil, errors.New("invalid range: " + r)
|
||||
}
|
||||
for i := start; i <= end; i++ {
|
||||
bits.SetBit(bits, int(i), 1)
|
||||
}
|
||||
} else {
|
||||
val, err := strconv.ParseUint(startr, 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bits.SetBit(bits, int(val), 1)
|
||||
}
|
||||
}
|
||||
|
||||
ret := bits.Bytes()
|
||||
if len(ret) == 0 {
|
||||
// do not allow empty values
|
||||
return nil, errors.New("empty value")
|
||||
}
|
||||
|
||||
// fit cpuset parsing order in systemd
|
||||
for l, r := 0, len(ret)-1; l < r; l, r = l+1, r-1 {
|
||||
ret[l], ret[r] = ret[r], ret[l]
|
||||
}
|
||||
return ret, nil
|
||||
}
|
102
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
Normal file
102
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
Normal file
@ -0,0 +1,102 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
)
|
||||
|
||||
var (
|
||||
dbusC *systemdDbus.Conn
|
||||
dbusMu sync.RWMutex
|
||||
dbusInited bool
|
||||
dbusRootless bool
|
||||
)
|
||||
|
||||
type dbusConnManager struct{}
|
||||
|
||||
// newDbusConnManager initializes systemd dbus connection manager.
|
||||
func newDbusConnManager(rootless bool) *dbusConnManager {
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if dbusInited && rootless != dbusRootless {
|
||||
panic("can't have both root and rootless dbus")
|
||||
}
|
||||
dbusInited = true
|
||||
dbusRootless = rootless
|
||||
return &dbusConnManager{}
|
||||
}
|
||||
|
||||
// getConnection lazily initializes and returns systemd dbus connection.
|
||||
func (d *dbusConnManager) getConnection() (*systemdDbus.Conn, error) {
|
||||
// In the case where dbusC != nil
|
||||
// Use the read lock the first time to ensure
|
||||
// that Conn can be acquired at the same time.
|
||||
dbusMu.RLock()
|
||||
if conn := dbusC; conn != nil {
|
||||
dbusMu.RUnlock()
|
||||
return conn, nil
|
||||
}
|
||||
dbusMu.RUnlock()
|
||||
|
||||
// In the case where dbusC == nil
|
||||
// Use write lock to ensure that only one
|
||||
// will be created
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if conn := dbusC; conn != nil {
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
conn, err := d.newConnection()
|
||||
if err != nil {
|
||||
// When dbus-user-session is not installed, we can't detect whether we should try to connect to user dbus or system dbus, so d.dbusRootless is set to false.
|
||||
// This may fail with a cryptic error "read unix @->/run/systemd/private: read: connection reset by peer: unknown."
|
||||
// https://github.com/moby/moby/issues/42793
|
||||
return nil, fmt.Errorf("failed to connect to dbus (hint: for rootless containers, maybe you need to install dbus-user-session package, see https://github.com/opencontainers/runc/blob/master/docs/cgroup-v2.md): %w", err)
|
||||
}
|
||||
dbusC = conn
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) {
|
||||
if dbusRootless {
|
||||
return newUserSystemdDbus()
|
||||
}
|
||||
return systemdDbus.NewWithContext(context.TODO())
|
||||
}
|
||||
|
||||
// resetConnection resets the connection to its initial state
|
||||
// (so it can be reconnected if necessary).
|
||||
func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) {
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if dbusC != nil && dbusC == conn {
|
||||
dbusC.Close()
|
||||
dbusC = nil
|
||||
}
|
||||
}
|
||||
|
||||
// retryOnDisconnect calls op, and if the error it returns is about closed dbus
|
||||
// connection, the connection is re-established and the op is retried. This helps
|
||||
// with the situation when dbus is restarted and we have a stale connection.
|
||||
func (d *dbusConnManager) retryOnDisconnect(op func(*systemdDbus.Conn) error) error {
|
||||
for {
|
||||
conn, err := d.getConnection()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = op(conn)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if !errors.Is(err, dbus.ErrClosed) {
|
||||
return err
|
||||
}
|
||||
d.resetConnection(conn)
|
||||
}
|
||||
}
|
74
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/devices.go
generated
vendored
Normal file
74
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/devices.go
generated
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// freezeBeforeSet answers whether there is a need to freeze the cgroup before
|
||||
// applying its systemd unit properties, and thaw after, while avoiding
|
||||
// unnecessary freezer state changes.
|
||||
//
|
||||
// The reason why we have to freeze is that systemd's application of device
|
||||
// rules is done disruptively, resulting in spurious errors to common devices
|
||||
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||
// containers). So we have to freeze the container to avoid the container get
|
||||
// an occasional "permission denied" error.
|
||||
func (m *LegacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (needsFreeze, needsThaw bool, err error) {
|
||||
// Special case for SkipDevices, as used by Kubernetes to create pod
|
||||
// cgroups with allow-all device policy).
|
||||
if r.SkipDevices {
|
||||
if r.SkipFreezeOnSet {
|
||||
// Both needsFreeze and needsThaw are false.
|
||||
return
|
||||
}
|
||||
|
||||
// No need to freeze if SkipDevices is set, and either
|
||||
// (1) systemd unit does not (yet) exist, or
|
||||
// (2) it has DevicePolicy=auto and empty DeviceAllow list.
|
||||
//
|
||||
// Interestingly, (1) and (2) are the same here because
|
||||
// a non-existent unit returns default properties,
|
||||
// and settings in (2) are the defaults.
|
||||
//
|
||||
// Do not return errors from getUnitTypeProperty, as they alone
|
||||
// should not prevent Set from working.
|
||||
|
||||
unitType := getUnitType(unitName)
|
||||
|
||||
devPolicy, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DevicePolicy")
|
||||
if e == nil && devPolicy.Value == dbus.MakeVariant("auto") {
|
||||
devAllow, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DeviceAllow")
|
||||
if e == nil {
|
||||
if rv := reflect.ValueOf(devAllow.Value.Value()); rv.Kind() == reflect.Slice && rv.Len() == 0 {
|
||||
needsFreeze = false
|
||||
needsThaw = false
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
needsFreeze = true
|
||||
needsThaw = true
|
||||
|
||||
// Check the current freezer state.
|
||||
freezerState, err := m.GetFreezerState()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if freezerState == configs.Frozen {
|
||||
// Already frozen, and should stay frozen.
|
||||
needsFreeze = false
|
||||
needsThaw = false
|
||||
}
|
||||
|
||||
if r.Freezer == configs.Frozen {
|
||||
// Will be frozen anyway -- no need to thaw.
|
||||
needsThaw = false
|
||||
}
|
||||
return
|
||||
}
|
93
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
93
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/moby/sys/userns"
|
||||
)
|
||||
|
||||
// newUserSystemdDbus creates a connection for systemd user-instance.
|
||||
func newUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||
addr, err := DetectUserDbusSessionBusAddress()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
uid, err := DetectUID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
|
||||
conn, err := dbus.Dial(addr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while dialing %q: %w", addr, err)
|
||||
}
|
||||
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
|
||||
err = conn.Auth(methods)
|
||||
if err != nil {
|
||||
conn.Close()
|
||||
return nil, fmt.Errorf("error while authenticating connection (address=%q, UID=%d): %w", addr, uid, err)
|
||||
}
|
||||
if err = conn.Hello(); err != nil {
|
||||
conn.Close()
|
||||
return nil, fmt.Errorf("error while sending Hello message (address=%q, UID=%d): %w", addr, uid, err)
|
||||
}
|
||||
return conn, nil
|
||||
})
|
||||
}
|
||||
|
||||
// DetectUID detects UID from the OwnerUID field of `busctl --user status`
|
||||
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
|
||||
//
|
||||
// Otherwise returns os.Getuid() .
|
||||
func DetectUID() (int, error) {
|
||||
if !userns.RunningInUserNS() {
|
||||
return os.Getuid(), nil
|
||||
}
|
||||
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("could not execute `busctl --user --no-pager status` (output: %q): %w", string(b), err)
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(s, "OwnerUID=") {
|
||||
uidStr := strings.TrimPrefix(s, "OwnerUID=")
|
||||
i, err := strconv.Atoi(uidStr)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("could not detect the OwnerUID: %w", err)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return -1, errors.New("could not detect the OwnerUID")
|
||||
}
|
||||
|
||||
// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS, if set.
|
||||
// Otherwise it returns "unix:path=$XDG_RUNTIME_DIR/bus", if $XDG_RUNTIME_DIR/bus exists.
|
||||
func DetectUserDbusSessionBusAddress() (string, error) {
|
||||
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
|
||||
return env, nil
|
||||
}
|
||||
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
|
||||
busPath := filepath.Join(xdr, "bus")
|
||||
if _, err := os.Stat(busPath); err == nil {
|
||||
busAddress := "unix:path=" + dbus.EscapeBusAddressValue(busPath)
|
||||
return busAddress, nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from the environment; make sure you have installed the dbus-user-session or dbus-daemon package; note you may need to re-login")
|
||||
}
|
413
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
Normal file
413
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
Normal file
@ -0,0 +1,413 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type LegacyManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
dbus *dbusConnManager
|
||||
}
|
||||
|
||||
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) (*LegacyManager, error) {
|
||||
if cg.Rootless {
|
||||
return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1")
|
||||
}
|
||||
if cg.Resources != nil && cg.Resources.Unified != nil {
|
||||
return nil, cgroups.ErrV1NoUnified
|
||||
}
|
||||
if paths == nil {
|
||||
var err error
|
||||
paths, err = initPaths(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return &LegacyManager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
dbus: newDbusConnManager(false),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// GetStats returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Set sets cgroup resource limits.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
var legacySubsystems = []subsystem{
|
||||
&fs.CpusetGroup{},
|
||||
&fs.DevicesGroup{},
|
||||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
&fs.FreezerGroup{},
|
||||
&fs.NetPrioGroup{},
|
||||
&fs.NetClsGroup{},
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
&fs.RdmaGroup{},
|
||||
&fs.NameGroup{GroupName: "misc"},
|
||||
}
|
||||
|
||||
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
|
||||
deviceProperties, err := generateDeviceProperties(r, cm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(r.Memory)))
|
||||
}
|
||||
|
||||
if r.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", r.CpuShares))
|
||||
}
|
||||
|
||||
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(r.BlkioWeight)))
|
||||
}
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// initPaths figures out and returns paths to cgroups.
|
||||
func initPaths(c *configs.Cgroup) (map[string]string, error) {
|
||||
slice := "system.slice"
|
||||
if c.Parent != "" {
|
||||
var err error
|
||||
slice, err = ExpandSlice(c.Parent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
unit := getUnitName(c)
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range legacySubsystems {
|
||||
subsystemPath, err := getSubsystemPath(slice, unit, s.Name())
|
||||
if err != nil {
|
||||
// Even if it's `not found` error, we'll return err
|
||||
// because devices cgroup is hard requirement for
|
||||
// container security.
|
||||
if s.Name() == "devices" {
|
||||
return nil, err
|
||||
}
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
|
||||
// If systemd is using cgroups-hybrid mode then add the slice path of
|
||||
// this container to the paths so the following process executed with
|
||||
// "runc exec" joins that cgroup as well.
|
||||
if cgroups.IsCgroup2HybridMode() {
|
||||
// "" means cgroup-hybrid path
|
||||
cgroupsHybridPath, err := getSubsystemPath(slice, unit, "")
|
||||
if err != nil && cgroups.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
paths[""] = cgroupsHybridPath
|
||||
}
|
||||
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// If we create a slice, the parent is defined via a Wants=.
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// Otherwise it's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
// Assume scopes always support delegation (supported since systemd v218).
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true),
|
||||
newProp("TasksAccounting", true),
|
||||
)
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := m.joinCgroups(pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))
|
||||
|
||||
// Both on success and on error, cleanup all the cgroups
|
||||
// we are aware of, as some of them were created directly
|
||||
// by Apply() and are not managed by systemd.
|
||||
if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return stopErr
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func (m *LegacyManager) joinCgroups(pid int) error {
|
||||
for _, sys := range legacySubsystems {
|
||||
name := sys.Name()
|
||||
switch name {
|
||||
case "name=systemd":
|
||||
// let systemd handle this
|
||||
case "cpuset":
|
||||
if path, ok := m.paths[name]; ok {
|
||||
s := &fs.CpusetGroup{}
|
||||
if err := s.ApplyDir(path, m.cgroups.Resources, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
if path, ok := m.paths[name]; ok {
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(slice, unit, subsystem string) (string, error) {
|
||||
mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mountpoint, slice, unit), nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Freeze(state configs.FreezerState) error {
|
||||
err := m.doFreeze(state)
|
||||
if err == nil {
|
||||
m.cgroups.Resources.Freezer = state
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// doFreeze is the same as Freeze but without
|
||||
// changing the m.cgroups.Resources.Frozen field.
|
||||
func (m *LegacyManager) doFreeze(state configs.FreezerState) error {
|
||||
path, ok := m.paths["freezer"]
|
||||
if !ok {
|
||||
return errSubsystemDoesNotExist
|
||||
}
|
||||
freezer := &fs.FreezerGroup{}
|
||||
resources := &configs.Resources{Freezer: state}
|
||||
return freezer.Set(path, resources)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPids() ([]int, error) {
|
||||
path, ok := m.paths["devices"]
|
||||
if !ok {
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetAllPids() ([]int, error) {
|
||||
path, ok := m.paths["devices"]
|
||||
if !ok {
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for _, sys := range legacySubsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
properties, err := genV1ResourcesProperties(r, m.dbus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
unitName := getUnitName(m.cgroups)
|
||||
needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if needsFreeze {
|
||||
if err := m.doFreeze(configs.Frozen); err != nil {
|
||||
// If freezer cgroup isn't supported, we just warn about it.
|
||||
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||
// skip update the cgroup while frozen failed. #3803
|
||||
if !errors.Is(err, errSubsystemDoesNotExist) {
|
||||
if needsThaw {
|
||||
if thawErr := m.doFreeze(configs.Thawed); thawErr != nil {
|
||||
logrus.Infof("thaw container after doFreeze failed: %v", thawErr)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
setErr := setUnitProperties(m.dbus, unitName, properties...)
|
||||
if needsThaw {
|
||||
if err := m.doFreeze(configs.Thawed); err != nil {
|
||||
logrus.Infof("thaw container after SetUnitProperties failed: %v", err)
|
||||
}
|
||||
}
|
||||
if setErr != nil {
|
||||
return setErr
|
||||
}
|
||||
|
||||
for _, sys := range legacySubsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, ok := m.paths[sys.Name()]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
path, ok := m.paths["freezer"]
|
||||
if !ok {
|
||||
return configs.Undefined, nil
|
||||
}
|
||||
freezer := &fs.FreezerGroup{}
|
||||
return freezer.GetState(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *LegacyManager) OOMKillCount() (uint64, error) {
|
||||
return fs.OOMKillCount(m.Path("memory"))
|
||||
}
|
516
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
516
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
@ -0,0 +1,516 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
cpuIdleSupportedVersion = 252
|
||||
)
|
||||
|
||||
type UnifiedManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
path string
|
||||
dbus *dbusConnManager
|
||||
fsMgr cgroups.Manager
|
||||
}
|
||||
|
||||
func NewUnifiedManager(config *configs.Cgroup, path string) (*UnifiedManager, error) {
|
||||
m := &UnifiedManager{
|
||||
cgroups: config,
|
||||
path: path,
|
||||
dbus: newDbusConnManager(config.Rootless),
|
||||
}
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fsMgr, err := fs2.NewManager(config, m.path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m.fsMgr = fsMgr
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func shouldSetCPUIdle(cm *dbusConnManager, v string) bool {
|
||||
// The only valid values for cpu.idle are 0 and 1. As it is
|
||||
// not possible to directly set cpu.idle to 0 via systemd,
|
||||
// ignore 0. Ignore other values as we'll error out later
|
||||
// in Set() while calling fsMgr.Set().
|
||||
return v == "1" && systemdVersion(cm) >= cpuIdleSupportedVersion
|
||||
}
|
||||
|
||||
// unifiedResToSystemdProps tries to convert from Cgroup.Resources.Unified
|
||||
// key/value map (where key is cgroupfs file name) to systemd unit properties.
|
||||
// This is on a best-effort basis, so the properties that are not known
|
||||
// (to this function and/or systemd) are ignored (but logged with "debug"
|
||||
// log level).
|
||||
//
|
||||
// For the list of keys, see https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
//
|
||||
// For the list of systemd unit properties, see systemd.resource-control(5).
|
||||
func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props []systemdDbus.Property, _ error) {
|
||||
var err error
|
||||
|
||||
for k, v := range res {
|
||||
if strings.Contains(k, "/") {
|
||||
return nil, fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
|
||||
}
|
||||
if strings.IndexByte(k, '.') <= 0 {
|
||||
return nil, fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k)
|
||||
}
|
||||
// Kernel is quite forgiving to extra whitespace
|
||||
// around the value, and so should we.
|
||||
v = strings.TrimSpace(v)
|
||||
// Please keep cases in alphabetical order.
|
||||
switch k {
|
||||
case "cpu.idle":
|
||||
if shouldSetCPUIdle(cm, v) {
|
||||
// Setting CPUWeight to 0 tells systemd
|
||||
// to set cpu.idle to 1.
|
||||
props = append(props,
|
||||
newProp("CPUWeight", uint64(0)))
|
||||
}
|
||||
|
||||
case "cpu.max":
|
||||
// value: quota [period]
|
||||
quota := int64(0) // 0 means "unlimited" for addCpuQuota, if period is set
|
||||
period := defCPUQuotaPeriod
|
||||
sv := strings.Fields(v)
|
||||
if len(sv) < 1 || len(sv) > 2 {
|
||||
return nil, fmt.Errorf("unified resource %q value invalid: %q", k, v)
|
||||
}
|
||||
// quota
|
||||
if sv[0] != "max" {
|
||||
quota, err = strconv.ParseInt(sv[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q period value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
// period
|
||||
if len(sv) == 2 {
|
||||
period, err = strconv.ParseUint(sv[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
addCpuQuota(cm, &props, quota, period)
|
||||
|
||||
case "cpu.weight":
|
||||
if shouldSetCPUIdle(cm, strings.TrimSpace(res["cpu.idle"])) {
|
||||
// Do not add duplicate CPUWeight property
|
||||
// (see case "cpu.idle" above).
|
||||
logrus.Warn("unable to apply both cpu.weight and cpu.idle to systemd, ignoring cpu.weight")
|
||||
continue
|
||||
}
|
||||
num, err := strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||
}
|
||||
props = append(props,
|
||||
newProp("CPUWeight", num))
|
||||
|
||||
case "cpuset.cpus", "cpuset.mems":
|
||||
bits, err := RangeToBits(v)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err)
|
||||
}
|
||||
m := map[string]string{
|
||||
"cpuset.cpus": "AllowedCPUs",
|
||||
"cpuset.mems": "AllowedMemoryNodes",
|
||||
}
|
||||
// systemd only supports these properties since v244
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer >= 244 {
|
||||
props = append(props,
|
||||
newProp(m[k], bits))
|
||||
} else {
|
||||
logrus.Debugf("systemd v%d is too old to support %s"+
|
||||
" (setting will still be applied to cgroupfs)",
|
||||
sdVer, m[k])
|
||||
}
|
||||
|
||||
case "memory.high", "memory.low", "memory.min", "memory.max", "memory.swap.max":
|
||||
num := uint64(math.MaxUint64)
|
||||
if v != "max" {
|
||||
num, err = strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
m := map[string]string{
|
||||
"memory.high": "MemoryHigh",
|
||||
"memory.low": "MemoryLow",
|
||||
"memory.min": "MemoryMin",
|
||||
"memory.max": "MemoryMax",
|
||||
"memory.swap.max": "MemorySwapMax",
|
||||
}
|
||||
props = append(props,
|
||||
newProp(m[k], num))
|
||||
|
||||
case "pids.max":
|
||||
num := uint64(math.MaxUint64)
|
||||
if v != "max" {
|
||||
var err error
|
||||
num, err = strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
props = append(props,
|
||||
newProp("TasksMax", num))
|
||||
|
||||
case "memory.oom.group":
|
||||
// Setting this to 1 is roughly equivalent to OOMPolicy=kill
|
||||
// (as per systemd.service(5) and
|
||||
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html),
|
||||
// but it's not clear what to do if it is unset or set
|
||||
// to 0 in runc update, as there are two other possible
|
||||
// values for OOMPolicy (continue/stop).
|
||||
fallthrough
|
||||
|
||||
default:
|
||||
// Ignore the unknown resource here -- will still be
|
||||
// applied in Set which calls fs2.Set.
|
||||
logrus.Debugf("don't know how to convert unified resource %q=%q to systemd unit property; skipping (will still be applied to cgroupfs)", k, v)
|
||||
}
|
||||
}
|
||||
|
||||
return props, nil
|
||||
}
|
||||
|
||||
func genV2ResourcesProperties(dirPath string, r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
// We need this check before setting systemd properties, otherwise
|
||||
// the container is OOM-killed and the systemd unit is removed
|
||||
// before we get to fsMgr.Set().
|
||||
if err := fs2.CheckMemoryUsage(dirPath, r); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var properties []systemdDbus.Property
|
||||
|
||||
// NOTE: This is of questionable correctness because we insert our own
|
||||
// devices eBPF program later. Two programs with identical rules
|
||||
// aren't the end of the world, but it is a bit concerning. However
|
||||
// it's unclear if systemd removes all eBPF programs attached when
|
||||
// doing SetUnitProperties...
|
||||
deviceProperties, err := generateDeviceProperties(r, cm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryMax", uint64(r.Memory)))
|
||||
}
|
||||
if r.MemoryReservation != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLow", uint64(r.MemoryReservation)))
|
||||
}
|
||||
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if swap != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemorySwapMax", uint64(swap)))
|
||||
}
|
||||
|
||||
idleSet := false
|
||||
// The logic here is the same as in shouldSetCPUIdle.
|
||||
if r.CPUIdle != nil && *r.CPUIdle == 1 && systemdVersion(cm) >= cpuIdleSupportedVersion {
|
||||
properties = append(properties,
|
||||
newProp("CPUWeight", uint64(0)))
|
||||
idleSet = true
|
||||
}
|
||||
if r.CpuWeight != 0 {
|
||||
if idleSet {
|
||||
// Ignore CpuWeight if CPUIdle is already set.
|
||||
logrus.Warn("unable to apply both CPUWeight and CpuIdle to systemd, ignoring CPUWeight")
|
||||
} else {
|
||||
properties = append(properties,
|
||||
newProp("CPUWeight", r.CpuWeight))
|
||||
}
|
||||
}
|
||||
|
||||
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// ignore r.KernelMemory
|
||||
|
||||
// convert Resources.Unified map to systemd properties
|
||||
if r.Unified != nil {
|
||||
unifiedProps, err := unifiedResToSystemdProps(cm, r.Unified)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, unifiedProps...)
|
||||
}
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
slice := "system.slice"
|
||||
if m.cgroups.Rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// If we create a slice, the parent is defined via a Wants=.
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// Otherwise it's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
// Assume scopes always support delegation (supported since systemd v218).
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("IOAccounting", true),
|
||||
newProp("TasksAccounting", true),
|
||||
)
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
|
||||
return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err)
|
||||
}
|
||||
|
||||
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.OwnerUID != nil {
|
||||
// The directory itself must be chowned.
|
||||
err := os.Chown(m.path, *c.OwnerUID, -1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
filesToChown, err := cgroupFilesToChown()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, v := range filesToChown {
|
||||
err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
|
||||
// Some files might not be present.
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// The kernel exposes a list of files that should be chowned to the delegate
|
||||
// uid in /sys/kernel/cgroup/delegate. If the file is not present
|
||||
// (Linux < 4.15), use the initial values mentioned in cgroups(7).
|
||||
func cgroupFilesToChown() ([]string, error) {
|
||||
const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
|
||||
|
||||
f, err := os.Open(cgroupDelegateFile)
|
||||
if err != nil {
|
||||
return []string{"cgroup.procs", "cgroup.subtree_control", "cgroup.threads"}, nil
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
filesToChown := []string{}
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
filesToChown = append(filesToChown, scanner.Text())
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
|
||||
}
|
||||
|
||||
return filesToChown, nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
unitName := getUnitName(m.cgroups)
|
||||
if err := stopUnit(m.dbus, unitName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// systemd 239 do not remove sub-cgroups.
|
||||
err := m.fsMgr.Destroy()
|
||||
// fsMgr.Destroy has handled ErrNotExist
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Path(_ string) string {
|
||||
return m.path
|
||||
}
|
||||
|
||||
// getSliceFull value is used in initPath.
|
||||
// The value is incompatible with systemdDbus.PropSlice.
|
||||
func (m *UnifiedManager) getSliceFull() (string, error) {
|
||||
c := m.cgroups
|
||||
slice := "system.slice"
|
||||
if c.Rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
var err error
|
||||
slice, err = ExpandSlice(c.Parent)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
if c.Rootless {
|
||||
// managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service".
|
||||
managerCG, err := getManagerProperty(m.dbus, "ControlGroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
slice = filepath.Join(managerCG, slice)
|
||||
}
|
||||
|
||||
// an example of the final slice in rootless: "/user.slice/user-1001.slice/user@1001.service/user.slice"
|
||||
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/user@1001.service/" prefix NOT to be specified.
|
||||
return slice, nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) initPath() error {
|
||||
if m.path != "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
sliceFull, err := m.getSliceFull()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c := m.cgroups
|
||||
path := filepath.Join(sliceFull, getUnitName(c))
|
||||
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// an example of the final path in rootless:
|
||||
// "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
|
||||
m.path = path
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Freeze(state configs.FreezerState) error {
|
||||
return m.fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
return m.fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
properties, err := genV2ResourcesProperties(m.fsMgr.Path(""), r, m.dbus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
|
||||
return fmt.Errorf("unable to set unit properties: %w", err)
|
||||
}
|
||||
|
||||
return m.fsMgr.Set(r)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPaths() map[string]string {
|
||||
paths := make(map[string]string, 1)
|
||||
paths[""] = m.path
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
return m.fsMgr.GetFreezerState()
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Exists() bool {
|
||||
return cgroups.PathExists(m.path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) OOMKillCount() (uint64, error) {
|
||||
return m.fsMgr.OOMKillCount()
|
||||
}
|
456
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
Normal file
456
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
Normal file
@ -0,0 +1,456 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/moby/sys/userns"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
unifiedMountpoint = "/sys/fs/cgroup"
|
||||
hybridMountpoint = "/sys/fs/cgroup/unified"
|
||||
)
|
||||
|
||||
var (
|
||||
isUnifiedOnce sync.Once
|
||||
isUnified bool
|
||||
isHybridOnce sync.Once
|
||||
isHybrid bool
|
||||
)
|
||||
|
||||
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
|
||||
func IsCgroup2UnifiedMode() bool {
|
||||
isUnifiedOnce.Do(func() {
|
||||
var st unix.Statfs_t
|
||||
err := unix.Statfs(unifiedMountpoint, &st)
|
||||
if err != nil {
|
||||
level := logrus.WarnLevel
|
||||
if os.IsNotExist(err) && userns.RunningInUserNS() {
|
||||
// For rootless containers, sweep it under the rug.
|
||||
level = logrus.DebugLevel
|
||||
}
|
||||
logrus.StandardLogger().Logf(level,
|
||||
"statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err)
|
||||
}
|
||||
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isUnified
|
||||
}
|
||||
|
||||
// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode.
|
||||
func IsCgroup2HybridMode() bool {
|
||||
isHybridOnce.Do(func() {
|
||||
var st unix.Statfs_t
|
||||
err := unix.Statfs(hybridMountpoint, &st)
|
||||
if err != nil {
|
||||
isHybrid = false
|
||||
if !os.IsNotExist(err) {
|
||||
// Report unexpected errors.
|
||||
logrus.WithError(err).Debugf("statfs(%q) failed", hybridMountpoint)
|
||||
}
|
||||
return
|
||||
}
|
||||
isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isHybrid
|
||||
}
|
||||
|
||||
type Mount struct {
|
||||
Mountpoint string
|
||||
Root string
|
||||
Subsystems []string
|
||||
}
|
||||
|
||||
// GetCgroupMounts returns the mounts for the cgroup subsystems.
|
||||
// all indicates whether to return just the first instance or all the mounts.
|
||||
// This function should not be used from cgroupv2 code, as in this case
|
||||
// all the controllers are available under the constant unifiedMountpoint.
|
||||
func GetCgroupMounts(all bool) ([]Mount, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// TODO: remove cgroupv2 case once all external users are converted
|
||||
availableControllers, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Mount{
|
||||
Mountpoint: unifiedMountpoint,
|
||||
Root: unifiedMountpoint,
|
||||
Subsystems: availableControllers,
|
||||
}
|
||||
return []Mount{m}, nil
|
||||
}
|
||||
|
||||
return getCgroupMountsV1(all)
|
||||
}
|
||||
|
||||
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
|
||||
func GetAllSubsystems() ([]string, error) {
|
||||
// /proc/cgroups is meaningless for v2
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
|
||||
// - devices: implemented in kernel 4.15
|
||||
// - freezer: implemented in kernel 5.2
|
||||
// We assume these are always available, as it is hard to detect availability.
|
||||
pseudo := []string{"devices", "freezer"}
|
||||
data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
subsystems := append(pseudo, strings.Fields(data)...)
|
||||
return subsystems, nil
|
||||
}
|
||||
f, err := os.Open("/proc/cgroups")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
subsystems := []string{}
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
if text[0] != '#' {
|
||||
parts := strings.Fields(text)
|
||||
if len(parts) >= 4 && parts[3] != "0" {
|
||||
subsystems = append(subsystems, parts[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return subsystems, nil
|
||||
}
|
||||
|
||||
func readProcsFile(dir string) (out []int, _ error) {
|
||||
file := CgroupProcesses
|
||||
retry := true
|
||||
|
||||
again:
|
||||
f, err := OpenFile(dir, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
if errors.Is(s.Err(), unix.ENOTSUP) && retry {
|
||||
// For a threaded cgroup, read returns ENOTSUP, and we should
|
||||
// read from cgroup.threads instead.
|
||||
file = "cgroup.threads"
|
||||
retry = false
|
||||
goto again
|
||||
}
|
||||
return out, s.Err()
|
||||
}
|
||||
|
||||
// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
|
||||
// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
|
||||
//
|
||||
// "cpu": "/user.slice/user-1000.slice"
|
||||
// "pids": "/user.slice/user-1000.slice"
|
||||
//
|
||||
// etc.
|
||||
//
|
||||
// Note that for cgroup v2 unified hierarchy, there are no per-controller
|
||||
// cgroup paths, so the resulting map will have a single element where the key
|
||||
// is empty string ("") and the value is the cgroup path the <pid> is in.
|
||||
func ParseCgroupFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
// helper function for ParseCgroupFile to make testing easier
|
||||
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
cgroups := make(map[string]string)
|
||||
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
// from cgroups(7):
|
||||
// /proc/[pid]/cgroup
|
||||
// ...
|
||||
// For each cgroup hierarchy ... there is one entry
|
||||
// containing three colon-separated fields of the form:
|
||||
// hierarchy-ID:subsystem-list:cgroup-path
|
||||
parts := strings.SplitN(text, ":", 3)
|
||||
if len(parts) < 3 {
|
||||
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
|
||||
}
|
||||
|
||||
for _, subs := range strings.Split(parts[1], ",") {
|
||||
cgroups[subs] = parts[2]
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cgroups, nil
|
||||
}
|
||||
|
||||
func PathExists(path string) bool {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// rmdir tries to remove a directory, optionally retrying on EBUSY.
|
||||
func rmdir(path string, retry bool) error {
|
||||
delay := time.Millisecond
|
||||
tries := 10
|
||||
|
||||
again:
|
||||
err := unix.Rmdir(path)
|
||||
switch err { // nolint:errorlint // unix errors are bare
|
||||
case nil, unix.ENOENT:
|
||||
return nil
|
||||
case unix.EINTR:
|
||||
goto again
|
||||
case unix.EBUSY:
|
||||
if retry && tries > 0 {
|
||||
time.Sleep(delay)
|
||||
delay *= 2
|
||||
tries--
|
||||
goto again
|
||||
|
||||
}
|
||||
}
|
||||
return &os.PathError{Op: "rmdir", Path: path, Err: err}
|
||||
}
|
||||
|
||||
// RemovePath aims to remove cgroup path. It does so recursively,
|
||||
// by removing any subdirectories (sub-cgroups) first.
|
||||
func RemovePath(path string) error {
|
||||
// Try the fast path first.
|
||||
if err := rmdir(path, false); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
infos, err := os.ReadDir(path)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
for _, info := range infos {
|
||||
if info.IsDir() {
|
||||
// We should remove subcgroup first.
|
||||
if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
err = rmdir(path, true)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// RemovePaths iterates over the provided paths removing them.
|
||||
func RemovePaths(paths map[string]string) (err error) {
|
||||
for s, p := range paths {
|
||||
if err := RemovePath(p); err == nil {
|
||||
delete(paths, s)
|
||||
}
|
||||
}
|
||||
if len(paths) == 0 {
|
||||
clear(paths)
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Failed to remove paths: %v", paths)
|
||||
}
|
||||
|
||||
var (
|
||||
hugePageSizes []string
|
||||
initHPSOnce sync.Once
|
||||
)
|
||||
|
||||
func HugePageSizes() []string {
|
||||
initHPSOnce.Do(func() {
|
||||
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
files, err := dir.Readdirnames(0)
|
||||
dir.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
hugePageSizes, err = getHugePageSizeFromFilenames(files)
|
||||
if err != nil {
|
||||
logrus.Warn("HugePageSizes: ", err)
|
||||
}
|
||||
})
|
||||
|
||||
return hugePageSizes
|
||||
}
|
||||
|
||||
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||
pageSizes := make([]string, 0, len(fileNames))
|
||||
var warn error
|
||||
|
||||
for _, file := range fileNames {
|
||||
// example: hugepages-1048576kB
|
||||
val := strings.TrimPrefix(file, "hugepages-")
|
||||
if len(val) == len(file) {
|
||||
// Unexpected file name: no prefix found, ignore it.
|
||||
continue
|
||||
}
|
||||
// The suffix is always "kB" (as of Linux 5.13). If we find
|
||||
// something else, produce an error but keep going.
|
||||
eLen := len(val) - 2
|
||||
val = strings.TrimSuffix(val, "kB")
|
||||
if len(val) != eLen {
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = errors.New(file + `: invalid suffix (expected "kB")`)
|
||||
}
|
||||
continue
|
||||
}
|
||||
size, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = fmt.Errorf("%s: %w", file, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574
|
||||
// but in our case the size is in KB already.
|
||||
if size >= (1 << 20) {
|
||||
val = strconv.Itoa(size>>20) + "GB"
|
||||
} else if size >= (1 << 10) {
|
||||
val = strconv.Itoa(size>>10) + "MB"
|
||||
} else {
|
||||
val += "KB"
|
||||
}
|
||||
pageSizes = append(pageSizes, val)
|
||||
}
|
||||
|
||||
return pageSizes, warn
|
||||
}
|
||||
|
||||
// GetPids returns all pids, that were added to cgroup at path.
|
||||
func GetPids(dir string) ([]int, error) {
|
||||
return readProcsFile(dir)
|
||||
}
|
||||
|
||||
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
|
||||
func WriteCgroupProc(dir string, pid int) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", CgroupProcesses)
|
||||
}
|
||||
|
||||
// Dont attach any pid to the cgroup if -1 is specified as a pid
|
||||
if pid == -1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
_, err = file.WriteString(strconv.Itoa(pid))
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EINVAL might mean that the task being added to cgroup.procs is in state
|
||||
// TASK_NEW. We should attempt to do so again.
|
||||
if errors.Is(err, unix.EINVAL) {
|
||||
time.Sleep(30 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142)
|
||||
// convert from [2-262144] to [1-10000]
|
||||
// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)"
|
||||
func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
|
||||
if cpuShares == 0 {
|
||||
return 0
|
||||
}
|
||||
return (1 + ((cpuShares-2)*9999)/262142)
|
||||
}
|
||||
|
||||
// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec
|
||||
// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap
|
||||
// is defined as memory+swap combined, while in cgroup v2 swap is a separate value,
|
||||
// so we need to subtract memory from it where it makes sense.
|
||||
func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
|
||||
switch {
|
||||
case memory == -1 && memorySwap == 0:
|
||||
// For compatibility with cgroup1 controller, set swap to unlimited in
|
||||
// case the memory is set to unlimited and the swap is not explicitly set,
|
||||
// treating the request as "set both memory and swap to unlimited".
|
||||
return -1, nil
|
||||
case memorySwap == -1, memorySwap == 0:
|
||||
// Treat -1 ("max") and 0 ("unset") swap as is.
|
||||
return memorySwap, nil
|
||||
case memory == -1:
|
||||
// Unlimited memory, so treat swap as is.
|
||||
return memorySwap, nil
|
||||
case memory == 0:
|
||||
// Unset or unknown memory, can't calculate swap.
|
||||
return 0, errors.New("unable to set swap limit without memory limit")
|
||||
case memory < 0:
|
||||
// Does not make sense to subtract a negative value.
|
||||
return 0, fmt.Errorf("invalid memory value: %d", memory)
|
||||
case memorySwap < memory:
|
||||
// Sanity check.
|
||||
return 0, errors.New("memory+swap limit should be >= memory limit")
|
||||
}
|
||||
|
||||
return memorySwap - memory, nil
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for BlkIOWeight to IOWeight is y = (1 + (x - 10) * 9999 / 990)
|
||||
// convert linearly from [10-1000] to [1-10000]
|
||||
func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
|
||||
if blkIoWeight == 0 {
|
||||
return 0
|
||||
}
|
||||
return 1 + (uint64(blkIoWeight)-10)*9999/990
|
||||
}
|
277
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
Normal file
277
e2e/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
Normal file
@ -0,0 +1,277 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Code in this source file are specific to cgroup v1,
|
||||
// and must not be used from any cgroup v2 code.
|
||||
|
||||
const (
|
||||
CgroupNamePrefix = "name="
|
||||
defaultPrefix = "/sys/fs/cgroup"
|
||||
)
|
||||
|
||||
var (
|
||||
errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
|
||||
ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1")
|
||||
|
||||
readMountinfoOnce sync.Once
|
||||
readMountinfoErr error
|
||||
cgroupMountinfo []*mountinfo.Info
|
||||
)
|
||||
|
||||
type NotFoundError struct {
|
||||
Subsystem string
|
||||
}
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
||||
}
|
||||
|
||||
func NewNotFoundError(sub string) error {
|
||||
return &NotFoundError{
|
||||
Subsystem: sub,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
var nfErr *NotFoundError
|
||||
return errors.As(err, &nfErr)
|
||||
}
|
||||
|
||||
func tryDefaultPath(cgroupPath, subsystem string) string {
|
||||
if !strings.HasPrefix(defaultPrefix, cgroupPath) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// remove possible prefix
|
||||
subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix)
|
||||
|
||||
// Make sure we're still under defaultPrefix, and resolve
|
||||
// a possible symlink (like cpu -> cpu,cpuacct).
|
||||
path, err := securejoin.SecureJoin(defaultPrefix, subsystem)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (1) path should be a directory.
|
||||
st, err := os.Lstat(path)
|
||||
if err != nil || !st.IsDir() {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (2) path should be a mount point.
|
||||
pst, err := os.Lstat(filepath.Dir(path))
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev {
|
||||
// parent dir has the same dev -- path is not a mount point
|
||||
return ""
|
||||
}
|
||||
|
||||
// (3) path should have 'cgroup' fs type.
|
||||
fst := unix.Statfs_t{}
|
||||
err = unix.Statfs(path, &fst)
|
||||
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones
|
||||
// with fstype of "cgroup") for the current running process.
|
||||
//
|
||||
// The results are cached (to avoid re-reading mountinfo which is relatively
|
||||
// expensive), so it is assumed that cgroup mounts are not being changed.
|
||||
func readCgroupMountinfo() ([]*mountinfo.Info, error) {
|
||||
readMountinfoOnce.Do(func() {
|
||||
// mountinfo.GetMounts uses /proc/thread-self, so we can use it without
|
||||
// issues.
|
||||
cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts(
|
||||
mountinfo.FSTypeFilter("cgroup"),
|
||||
)
|
||||
})
|
||||
return cgroupMountinfo, readMountinfoErr
|
||||
}
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||
if len(subsystem) == 0 {
|
||||
return hybridMountpoint, nil
|
||||
}
|
||||
|
||||
// Avoid parsing mountinfo by trying the default path first, if possible.
|
||||
if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
|
||||
return path, nil
|
||||
}
|
||||
|
||||
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
|
||||
return mnt, err
|
||||
}
|
||||
|
||||
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", "", errUnified
|
||||
}
|
||||
|
||||
mi, err := readCgroupMountinfo()
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem)
|
||||
}
|
||||
|
||||
func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) {
|
||||
for _, mi := range mounts {
|
||||
if strings.HasPrefix(mi.Mountpoint, cgroupPath) {
|
||||
for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
||||
if opt == subsystem {
|
||||
return mi.Mountpoint, mi.Root, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", errors.New("no subsystem for mount")
|
||||
}
|
||||
|
||||
return getControllerPath(m.Subsystems[0], cgroups)
|
||||
}
|
||||
|
||||
func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) {
|
||||
res := make([]Mount, 0, len(ss))
|
||||
numFound := 0
|
||||
for _, mi := range mounts {
|
||||
m := Mount{
|
||||
Mountpoint: mi.Mountpoint,
|
||||
Root: mi.Root,
|
||||
}
|
||||
for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
||||
seen, known := ss[opt]
|
||||
if !known || (!all && seen) {
|
||||
continue
|
||||
}
|
||||
ss[opt] = true
|
||||
opt = strings.TrimPrefix(opt, CgroupNamePrefix)
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
numFound++
|
||||
}
|
||||
if len(m.Subsystems) > 0 || all {
|
||||
res = append(res, m)
|
||||
}
|
||||
if !all && numFound >= len(ss) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func getCgroupMountsV1(all bool) ([]Mount, error) {
|
||||
mi, err := readCgroupMountinfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// We don't need to use /proc/thread-self here because runc always runs
|
||||
// with every thread in the same cgroup. This lets us avoid having to do
|
||||
// runtime.LockOSThread.
|
||||
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allMap := make(map[string]bool)
|
||||
for s := range allSubsystems {
|
||||
allMap[s] = false
|
||||
}
|
||||
|
||||
return getCgroupMountsHelper(allMap, mi, all)
|
||||
}
|
||||
|
||||
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||
func GetOwnCgroup(subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
// We don't need to use /proc/thread-self here because runc always runs
|
||||
// with every thread in the same cgroup. This lets us avoid having to do
|
||||
// runtime.LockOSThread.
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetOwnCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||
if len(subsystem) == 0 {
|
||||
return hybridMountpoint, nil
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see paths from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
if p, ok := cgroups[subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
return "", NewNotFoundError(subsystem)
|
||||
}
|
66
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
generated
vendored
Normal file
66
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
generated
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
package configs
|
||||
|
||||
import "fmt"
|
||||
|
||||
// BlockIODevice holds major:minor format supported in blkio cgroup.
|
||||
type BlockIODevice struct {
|
||||
// Major is the device's major number
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number
|
||||
Minor int64 `json:"minor"`
|
||||
}
|
||||
|
||||
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
|
||||
type WeightDevice struct {
|
||||
BlockIODevice
|
||||
// Weight is the bandwidth rate for the device, range is from 10 to 1000
|
||||
Weight uint16 `json:"weight"`
|
||||
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
|
||||
LeafWeight uint16 `json:"leafWeight"`
|
||||
}
|
||||
|
||||
// NewWeightDevice returns a configured WeightDevice pointer
|
||||
func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
|
||||
wd := &WeightDevice{}
|
||||
wd.Major = major
|
||||
wd.Minor = minor
|
||||
wd.Weight = weight
|
||||
wd.LeafWeight = leafWeight
|
||||
return wd
|
||||
}
|
||||
|
||||
// WeightString formats the struct to be writable to the cgroup specific file
|
||||
func (wd *WeightDevice) WeightString() string {
|
||||
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
|
||||
}
|
||||
|
||||
// LeafWeightString formats the struct to be writable to the cgroup specific file
|
||||
func (wd *WeightDevice) LeafWeightString() string {
|
||||
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
|
||||
}
|
||||
|
||||
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
|
||||
type ThrottleDevice struct {
|
||||
BlockIODevice
|
||||
// Rate is the IO rate limit per cgroup per device
|
||||
Rate uint64 `json:"rate"`
|
||||
}
|
||||
|
||||
// NewThrottleDevice returns a configured ThrottleDevice pointer
|
||||
func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
|
||||
td := &ThrottleDevice{}
|
||||
td.Major = major
|
||||
td.Minor = minor
|
||||
td.Rate = rate
|
||||
return td
|
||||
}
|
||||
|
||||
// String formats the struct to be writable to the cgroup specific file
|
||||
func (td *ThrottleDevice) String() string {
|
||||
return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
|
||||
}
|
||||
|
||||
// StringName formats the struct to be writable to the cgroup specific file
|
||||
func (td *ThrottleDevice) StringName(name string) string {
|
||||
return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
|
||||
}
|
169
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
Normal file
169
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
Normal file
@ -0,0 +1,169 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
)
|
||||
|
||||
type FreezerState string
|
||||
|
||||
const (
|
||||
Undefined FreezerState = ""
|
||||
Frozen FreezerState = "FROZEN"
|
||||
Thawed FreezerState = "THAWED"
|
||||
)
|
||||
|
||||
// Cgroup holds properties of a cgroup on Linux.
|
||||
type Cgroup struct {
|
||||
// Name specifies the name of the cgroup
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// Parent specifies the name of parent of cgroup or slice
|
||||
Parent string `json:"parent,omitempty"`
|
||||
|
||||
// Path specifies the path to cgroups that are created and/or joined by the container.
|
||||
// The path is assumed to be relative to the host system cgroup mountpoint.
|
||||
Path string `json:"path"`
|
||||
|
||||
// ScopePrefix describes prefix for the scope name
|
||||
ScopePrefix string `json:"scope_prefix"`
|
||||
|
||||
// Resources contains various cgroups settings to apply
|
||||
*Resources
|
||||
|
||||
// Systemd tells if systemd should be used to manage cgroups.
|
||||
Systemd bool
|
||||
|
||||
// SystemdProps are any additional properties for systemd,
|
||||
// derived from org.systemd.property.xxx annotations.
|
||||
// Ignored unless systemd is used for managing cgroups.
|
||||
SystemdProps []systemdDbus.Property `json:"-"`
|
||||
|
||||
// Rootless tells if rootless cgroups should be used.
|
||||
Rootless bool
|
||||
|
||||
// The host UID that should own the cgroup, or nil to accept
|
||||
// the default ownership. This should only be set when the
|
||||
// cgroupfs is to be mounted read/write.
|
||||
// Not all cgroup manager implementations support changing
|
||||
// the ownership.
|
||||
OwnerUID *int `json:"owner_uid,omitempty"`
|
||||
}
|
||||
|
||||
type Resources struct {
|
||||
// Devices is the set of access rules for devices in the container.
|
||||
Devices []*devices.Rule `json:"devices"`
|
||||
|
||||
// Memory limit (in bytes)
|
||||
Memory int64 `json:"memory"`
|
||||
|
||||
// Memory reservation or soft_limit (in bytes)
|
||||
MemoryReservation int64 `json:"memory_reservation"`
|
||||
|
||||
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
|
||||
MemorySwap int64 `json:"memory_swap"`
|
||||
|
||||
// CPU shares (relative weight vs. other containers)
|
||||
CpuShares uint64 `json:"cpu_shares"`
|
||||
|
||||
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
||||
CpuQuota int64 `json:"cpu_quota"`
|
||||
|
||||
// CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period.
|
||||
CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive
|
||||
|
||||
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
|
||||
CpuPeriod uint64 `json:"cpu_period"`
|
||||
|
||||
// How many time CPU will use in realtime scheduling (in usecs).
|
||||
CpuRtRuntime int64 `json:"cpu_rt_quota"`
|
||||
|
||||
// CPU period to be used for realtime scheduling (in usecs).
|
||||
CpuRtPeriod uint64 `json:"cpu_rt_period"`
|
||||
|
||||
// CPU to use
|
||||
CpusetCpus string `json:"cpuset_cpus"`
|
||||
|
||||
// MEM to use
|
||||
CpusetMems string `json:"cpuset_mems"`
|
||||
|
||||
// cgroup SCHED_IDLE
|
||||
CPUIdle *int64 `json:"cpu_idle,omitempty"`
|
||||
|
||||
// Process limit; set <= `0' to disable limit.
|
||||
PidsLimit int64 `json:"pids_limit"`
|
||||
|
||||
// Specifies per cgroup weight, range is from 10 to 1000.
|
||||
BlkioWeight uint16 `json:"blkio_weight"`
|
||||
|
||||
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
|
||||
BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
|
||||
|
||||
// Weight per cgroup per device, can override BlkioWeight.
|
||||
BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
|
||||
|
||||
// IO read rate limit per cgroup per device, bytes per second.
|
||||
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
|
||||
|
||||
// IO write rate limit per cgroup per device, bytes per second.
|
||||
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
|
||||
|
||||
// IO read rate limit per cgroup per device, IO per second.
|
||||
BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
|
||||
|
||||
// IO write rate limit per cgroup per device, IO per second.
|
||||
BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
|
||||
|
||||
// set the freeze value for the process
|
||||
Freezer FreezerState `json:"freezer"`
|
||||
|
||||
// Hugetlb limit (in bytes)
|
||||
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
|
||||
|
||||
// Whether to disable OOM Killer
|
||||
OomKillDisable bool `json:"oom_kill_disable"`
|
||||
|
||||
// Tuning swappiness behaviour per cgroup
|
||||
MemorySwappiness *uint64 `json:"memory_swappiness"`
|
||||
|
||||
// Set priority of network traffic for container
|
||||
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
|
||||
|
||||
// Set class identifier for container's network packets
|
||||
NetClsClassid uint32 `json:"net_cls_classid_u"`
|
||||
|
||||
// Rdma resource restriction configuration
|
||||
Rdma map[string]LinuxRdma `json:"rdma"`
|
||||
|
||||
// Used on cgroups v2:
|
||||
|
||||
// CpuWeight sets a proportional bandwidth limit.
|
||||
CpuWeight uint64 `json:"cpu_weight"`
|
||||
|
||||
// Unified is cgroupv2-only key-value map.
|
||||
Unified map[string]string `json:"unified"`
|
||||
|
||||
// SkipDevices allows to skip configuring device permissions.
|
||||
// Used by e.g. kubelet while creating a parent cgroup (kubepods)
|
||||
// common for many containers, and by runc update.
|
||||
//
|
||||
// NOTE it is impossible to start a container which has this flag set.
|
||||
SkipDevices bool `json:"-"`
|
||||
|
||||
// SkipFreezeOnSet is a flag for cgroup manager to skip the cgroup
|
||||
// freeze when setting resources. Only applicable to systemd legacy
|
||||
// (i.e. cgroup v1) manager (which uses freeze by default to avoid
|
||||
// spurious permission errors caused by systemd inability to update
|
||||
// device rules in a non-disruptive manner).
|
||||
//
|
||||
// If not set, a few methods (such as looking into cgroup's
|
||||
// devices.list and querying the systemd unit properties) are used
|
||||
// during Set() to figure out whether the freeze is required. Those
|
||||
// methods may be relatively slow, thus this flag.
|
||||
SkipFreezeOnSet bool `json:"-"`
|
||||
|
||||
// MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check
|
||||
// if the new memory limits (Memory and MemorySwap) being set are lower
|
||||
// than the current memory usage, and reject if so.
|
||||
MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"`
|
||||
}
|
8
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
Normal file
8
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
//go:build !linux
|
||||
|
||||
package configs
|
||||
|
||||
// Cgroup holds properties of a cgroup on Linux
|
||||
// TODO Windows: This can ultimately be entirely factored out on Windows as
|
||||
// cgroups are a Unix-specific construct.
|
||||
type Cgroup struct{}
|
508
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
Normal file
508
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
Normal file
@ -0,0 +1,508 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type Rlimit struct {
|
||||
Type int `json:"type"`
|
||||
Hard uint64 `json:"hard"`
|
||||
Soft uint64 `json:"soft"`
|
||||
}
|
||||
|
||||
// IDMap represents UID/GID Mappings for User Namespaces.
|
||||
type IDMap struct {
|
||||
ContainerID int64 `json:"container_id"`
|
||||
HostID int64 `json:"host_id"`
|
||||
Size int64 `json:"size"`
|
||||
}
|
||||
|
||||
// Seccomp represents syscall restrictions
|
||||
// By default, only the native architecture of the kernel is allowed to be used
|
||||
// for syscalls. Additional architectures can be added by specifying them in
|
||||
// Architectures.
|
||||
type Seccomp struct {
|
||||
DefaultAction Action `json:"default_action"`
|
||||
Architectures []string `json:"architectures"`
|
||||
Flags []specs.LinuxSeccompFlag `json:"flags"`
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
DefaultErrnoRet *uint `json:"default_errno_ret"`
|
||||
ListenerPath string `json:"listener_path,omitempty"`
|
||||
ListenerMetadata string `json:"listener_metadata,omitempty"`
|
||||
}
|
||||
|
||||
// Action is taken upon rule match in Seccomp
|
||||
type Action int
|
||||
|
||||
const (
|
||||
Kill Action = iota + 1
|
||||
Errno
|
||||
Trap
|
||||
Allow
|
||||
Trace
|
||||
Log
|
||||
Notify
|
||||
KillThread
|
||||
KillProcess
|
||||
)
|
||||
|
||||
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
|
||||
type Operator int
|
||||
|
||||
const (
|
||||
EqualTo Operator = iota + 1
|
||||
NotEqualTo
|
||||
GreaterThan
|
||||
GreaterThanOrEqualTo
|
||||
LessThan
|
||||
LessThanOrEqualTo
|
||||
MaskEqualTo
|
||||
)
|
||||
|
||||
// Arg is a rule to match a specific syscall argument in Seccomp
|
||||
type Arg struct {
|
||||
Index uint `json:"index"`
|
||||
Value uint64 `json:"value"`
|
||||
ValueTwo uint64 `json:"value_two"`
|
||||
Op Operator `json:"op"`
|
||||
}
|
||||
|
||||
// Syscall is a rule to match a syscall in Seccomp
|
||||
type Syscall struct {
|
||||
Name string `json:"name"`
|
||||
Action Action `json:"action"`
|
||||
ErrnoRet *uint `json:"errnoRet"`
|
||||
Args []*Arg `json:"args"`
|
||||
}
|
||||
|
||||
// Config defines configuration options for executing a process inside a contained environment.
|
||||
type Config struct {
|
||||
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
|
||||
// This is a common option when the container is running in ramdisk
|
||||
NoPivotRoot bool `json:"no_pivot_root"`
|
||||
|
||||
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
|
||||
// that the parent process dies.
|
||||
ParentDeathSignal int `json:"parent_death_signal"`
|
||||
|
||||
// Path to a directory containing the container's root filesystem.
|
||||
Rootfs string `json:"rootfs"`
|
||||
|
||||
// Umask is the umask to use inside of the container.
|
||||
Umask *uint32 `json:"umask"`
|
||||
|
||||
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
|
||||
// bind mounts are writtable.
|
||||
Readonlyfs bool `json:"readonlyfs"`
|
||||
|
||||
// Specifies the mount propagation flags to be applied to /.
|
||||
RootPropagation int `json:"rootPropagation"`
|
||||
|
||||
// Mounts specify additional source and destination paths that will be mounted inside the container's
|
||||
// rootfs and mount namespace if specified
|
||||
Mounts []*Mount `json:"mounts"`
|
||||
|
||||
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
|
||||
Devices []*devices.Device `json:"devices"`
|
||||
|
||||
MountLabel string `json:"mount_label"`
|
||||
|
||||
// Hostname optionally sets the container's hostname if provided
|
||||
Hostname string `json:"hostname"`
|
||||
|
||||
// Domainname optionally sets the container's domainname if provided
|
||||
Domainname string `json:"domainname"`
|
||||
|
||||
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
|
||||
// If a namespace is not provided that namespace is shared from the container's parent process
|
||||
Namespaces Namespaces `json:"namespaces"`
|
||||
|
||||
// Capabilities specify the capabilities to keep when executing the process inside the container
|
||||
// All capabilities not specified will be dropped from the processes capability mask
|
||||
Capabilities *Capabilities `json:"capabilities"`
|
||||
|
||||
// Networks specifies the container's network setup to be created
|
||||
Networks []*Network `json:"networks"`
|
||||
|
||||
// Routes can be specified to create entries in the route table as the container is started
|
||||
Routes []*Route `json:"routes"`
|
||||
|
||||
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
|
||||
// placed into to limit the resources the container has available
|
||||
Cgroups *Cgroup `json:"cgroups"`
|
||||
|
||||
// AppArmorProfile specifies the profile to apply to the process running in the container and is
|
||||
// change at the time the process is execed
|
||||
AppArmorProfile string `json:"apparmor_profile,omitempty"`
|
||||
|
||||
// ProcessLabel specifies the label to apply to the process running in the container. It is
|
||||
// commonly used by selinux
|
||||
ProcessLabel string `json:"process_label,omitempty"`
|
||||
|
||||
// Rlimits specifies the resource limits, such as max open files, to set in the container
|
||||
// If Rlimits are not set, the container will inherit rlimits from the parent process
|
||||
Rlimits []Rlimit `json:"rlimits,omitempty"`
|
||||
|
||||
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
|
||||
// for a process. Valid values are between the range [-1000, '1000'], where processes with
|
||||
// higher scores are preferred for being killed. If it is unset then we don't touch the current
|
||||
// value.
|
||||
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
|
||||
OomScoreAdj *int `json:"oom_score_adj,omitempty"`
|
||||
|
||||
// UIDMappings is an array of User ID mappings for User Namespaces
|
||||
UIDMappings []IDMap `json:"uid_mappings"`
|
||||
|
||||
// GIDMappings is an array of Group ID mappings for User Namespaces
|
||||
GIDMappings []IDMap `json:"gid_mappings"`
|
||||
|
||||
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
|
||||
// mount pointing to /dev/null as to prevent reads of the file.
|
||||
MaskPaths []string `json:"mask_paths"`
|
||||
|
||||
// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
|
||||
// so that these files prevent any writes.
|
||||
ReadonlyPaths []string `json:"readonly_paths"`
|
||||
|
||||
// Sysctl is a map of properties and their values. It is the equivalent of using
|
||||
// sysctl -w my.property.name value in Linux.
|
||||
Sysctl map[string]string `json:"sysctl"`
|
||||
|
||||
// Seccomp allows actions to be taken whenever a syscall is made within the container.
|
||||
// A number of rules are given, each having an action to be taken if a syscall matches it.
|
||||
// A default action to be taken if no rules match is also given.
|
||||
Seccomp *Seccomp `json:"seccomp"`
|
||||
|
||||
// NoNewPrivileges controls whether processes in the container can gain additional privileges.
|
||||
NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
|
||||
|
||||
// Hooks are a collection of actions to perform at various container lifecycle events.
|
||||
// CommandHooks are serialized to JSON, but other hooks are not.
|
||||
Hooks Hooks
|
||||
|
||||
// Version is the version of opencontainer specification that is supported.
|
||||
Version string `json:"version"`
|
||||
|
||||
// Labels are user defined metadata that is stored in the config and populated on the state
|
||||
Labels []string `json:"labels"`
|
||||
|
||||
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
|
||||
// callers keyring in this case.
|
||||
NoNewKeyring bool `json:"no_new_keyring"`
|
||||
|
||||
// IntelRdt specifies settings for Intel RDT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
|
||||
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
||||
|
||||
// RootlessEUID is set when the runc was launched with non-zero EUID.
|
||||
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
|
||||
// When RootlessEUID is set, runc creates a new userns for the container.
|
||||
// (config.json needs to contain userns settings)
|
||||
RootlessEUID bool `json:"rootless_euid,omitempty"`
|
||||
|
||||
// RootlessCgroups is set when unlikely to have the full access to cgroups.
|
||||
// When RootlessCgroups is set, cgroups errors are ignored.
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
|
||||
// TimeOffsets specifies the offset for supporting time namespaces.
|
||||
TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"`
|
||||
|
||||
// Scheduler represents the scheduling attributes for a process.
|
||||
Scheduler *Scheduler `json:"scheduler,omitempty"`
|
||||
|
||||
// Personality contains configuration for the Linux personality syscall.
|
||||
Personality *LinuxPersonality `json:"personality,omitempty"`
|
||||
|
||||
// IOPriority is the container's I/O priority.
|
||||
IOPriority *IOPriority `json:"io_priority,omitempty"`
|
||||
}
|
||||
|
||||
// Scheduler is based on the Linux sched_setattr(2) syscall.
|
||||
type Scheduler = specs.Scheduler
|
||||
|
||||
// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr.
|
||||
func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
|
||||
var policy uint32
|
||||
switch scheduler.Policy {
|
||||
case specs.SchedOther:
|
||||
policy = 0
|
||||
case specs.SchedFIFO:
|
||||
policy = 1
|
||||
case specs.SchedRR:
|
||||
policy = 2
|
||||
case specs.SchedBatch:
|
||||
policy = 3
|
||||
case specs.SchedISO:
|
||||
policy = 4
|
||||
case specs.SchedIdle:
|
||||
policy = 5
|
||||
case specs.SchedDeadline:
|
||||
policy = 6
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy)
|
||||
}
|
||||
|
||||
var flags uint64
|
||||
for _, flag := range scheduler.Flags {
|
||||
switch flag {
|
||||
case specs.SchedFlagResetOnFork:
|
||||
flags |= 0x01
|
||||
case specs.SchedFlagReclaim:
|
||||
flags |= 0x02
|
||||
case specs.SchedFlagDLOverrun:
|
||||
flags |= 0x04
|
||||
case specs.SchedFlagKeepPolicy:
|
||||
flags |= 0x08
|
||||
case specs.SchedFlagKeepParams:
|
||||
flags |= 0x10
|
||||
case specs.SchedFlagUtilClampMin:
|
||||
flags |= 0x20
|
||||
case specs.SchedFlagUtilClampMax:
|
||||
flags |= 0x40
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid scheduler flag: %s", flag)
|
||||
}
|
||||
}
|
||||
|
||||
return &unix.SchedAttr{
|
||||
Size: unix.SizeofSchedAttr,
|
||||
Policy: policy,
|
||||
Flags: flags,
|
||||
Nice: scheduler.Nice,
|
||||
Priority: uint32(scheduler.Priority),
|
||||
Runtime: scheduler.Runtime,
|
||||
Deadline: scheduler.Deadline,
|
||||
Period: scheduler.Period,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var IOPrioClassMapping = map[specs.IOPriorityClass]int{
|
||||
specs.IOPRIO_CLASS_RT: 1,
|
||||
specs.IOPRIO_CLASS_BE: 2,
|
||||
specs.IOPRIO_CLASS_IDLE: 3,
|
||||
}
|
||||
|
||||
type IOPriority = specs.LinuxIOPriority
|
||||
|
||||
type (
|
||||
HookName string
|
||||
HookList []Hook
|
||||
Hooks map[HookName]HookList
|
||||
)
|
||||
|
||||
const (
|
||||
// Prestart commands are executed after the container namespaces are created,
|
||||
// but before the user supplied command is executed from init.
|
||||
// Note: This hook is now deprecated
|
||||
// Prestart commands are called in the Runtime namespace.
|
||||
Prestart HookName = "prestart"
|
||||
|
||||
// CreateRuntime commands MUST be called as part of the create operation after
|
||||
// the runtime environment has been created but before the pivot_root has been executed.
|
||||
// CreateRuntime is called immediately after the deprecated Prestart hook.
|
||||
// CreateRuntime commands are called in the Runtime Namespace.
|
||||
CreateRuntime HookName = "createRuntime"
|
||||
|
||||
// CreateContainer commands MUST be called as part of the create operation after
|
||||
// the runtime environment has been created but before the pivot_root has been executed.
|
||||
// CreateContainer commands are called in the Container namespace.
|
||||
CreateContainer HookName = "createContainer"
|
||||
|
||||
// StartContainer commands MUST be called as part of the start operation and before
|
||||
// the container process is started.
|
||||
// StartContainer commands are called in the Container namespace.
|
||||
StartContainer HookName = "startContainer"
|
||||
|
||||
// Poststart commands are executed after the container init process starts.
|
||||
// Poststart commands are called in the Runtime Namespace.
|
||||
Poststart HookName = "poststart"
|
||||
|
||||
// Poststop commands are executed after the container init process exits.
|
||||
// Poststop commands are called in the Runtime Namespace.
|
||||
Poststop HookName = "poststop"
|
||||
)
|
||||
|
||||
// KnownHookNames returns the known hook names.
|
||||
// Used by `runc features`.
|
||||
func KnownHookNames() []string {
|
||||
return []string{
|
||||
string(Prestart), // deprecated
|
||||
string(CreateRuntime),
|
||||
string(CreateContainer),
|
||||
string(StartContainer),
|
||||
string(Poststart),
|
||||
string(Poststop),
|
||||
}
|
||||
}
|
||||
|
||||
type Capabilities struct {
|
||||
// Bounding is the set of capabilities checked by the kernel.
|
||||
Bounding []string
|
||||
// Effective is the set of capabilities checked by the kernel.
|
||||
Effective []string
|
||||
// Inheritable is the capabilities preserved across execve.
|
||||
Inheritable []string
|
||||
// Permitted is the limiting superset for effective capabilities.
|
||||
Permitted []string
|
||||
// Ambient is the ambient set of capabilities that are kept.
|
||||
Ambient []string
|
||||
}
|
||||
|
||||
// Deprecated: use (Hooks).Run instead.
|
||||
func (hooks HookList) RunHooks(state *specs.State) error {
|
||||
for i, h := range hooks {
|
||||
if err := h.Run(state); err != nil {
|
||||
return fmt.Errorf("error running hook #%d: %w", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
||||
var state map[HookName][]CommandHook
|
||||
|
||||
if err := json.Unmarshal(b, &state); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*hooks = Hooks{}
|
||||
for n, commandHooks := range state {
|
||||
if len(commandHooks) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
(*hooks)[n] = HookList{}
|
||||
for _, h := range commandHooks {
|
||||
(*hooks)[n] = append((*hooks)[n], h)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hooks *Hooks) MarshalJSON() ([]byte, error) {
|
||||
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
|
||||
for _, hook := range hooks {
|
||||
switch chook := hook.(type) {
|
||||
case CommandHook:
|
||||
serializableHooks = append(serializableHooks, chook)
|
||||
default:
|
||||
logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
|
||||
}
|
||||
}
|
||||
|
||||
return serializableHooks
|
||||
}
|
||||
|
||||
return json.Marshal(map[string]interface{}{
|
||||
"prestart": serialize((*hooks)[Prestart]),
|
||||
"createRuntime": serialize((*hooks)[CreateRuntime]),
|
||||
"createContainer": serialize((*hooks)[CreateContainer]),
|
||||
"startContainer": serialize((*hooks)[StartContainer]),
|
||||
"poststart": serialize((*hooks)[Poststart]),
|
||||
"poststop": serialize((*hooks)[Poststop]),
|
||||
})
|
||||
}
|
||||
|
||||
// Run executes all hooks for the given hook name.
|
||||
func (hooks Hooks) Run(name HookName, state *specs.State) error {
|
||||
list := hooks[name]
|
||||
for i, h := range list {
|
||||
if err := h.Run(state); err != nil {
|
||||
return fmt.Errorf("error running %s hook #%d: %w", name, i, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type Hook interface {
|
||||
// Run executes the hook with the provided state.
|
||||
Run(*specs.State) error
|
||||
}
|
||||
|
||||
// NewFunctionHook will call the provided function when the hook is run.
|
||||
func NewFunctionHook(f func(*specs.State) error) FuncHook {
|
||||
return FuncHook{
|
||||
run: f,
|
||||
}
|
||||
}
|
||||
|
||||
type FuncHook struct {
|
||||
run func(*specs.State) error
|
||||
}
|
||||
|
||||
func (f FuncHook) Run(s *specs.State) error {
|
||||
return f.run(s)
|
||||
}
|
||||
|
||||
type Command struct {
|
||||
Path string `json:"path"`
|
||||
Args []string `json:"args"`
|
||||
Env []string `json:"env"`
|
||||
Dir string `json:"dir"`
|
||||
Timeout *time.Duration `json:"timeout"`
|
||||
}
|
||||
|
||||
// NewCommandHook will execute the provided command when the hook is run.
|
||||
func NewCommandHook(cmd Command) CommandHook {
|
||||
return CommandHook{
|
||||
Command: cmd,
|
||||
}
|
||||
}
|
||||
|
||||
type CommandHook struct {
|
||||
Command
|
||||
}
|
||||
|
||||
func (c Command) Run(s *specs.State) error {
|
||||
b, err := json.Marshal(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd := exec.Cmd{
|
||||
Path: c.Path,
|
||||
Args: c.Args,
|
||||
Env: c.Env,
|
||||
Stdin: bytes.NewReader(b),
|
||||
Stdout: &stdout,
|
||||
Stderr: &stderr,
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
errC := make(chan error, 1)
|
||||
go func() {
|
||||
err := cmd.Wait()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("%w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
||||
}
|
||||
errC <- err
|
||||
}()
|
||||
var timerCh <-chan time.Time
|
||||
if c.Timeout != nil {
|
||||
timer := time.NewTimer(*c.Timeout)
|
||||
defer timer.Stop()
|
||||
timerCh = timer.C
|
||||
}
|
||||
select {
|
||||
case err := <-errC:
|
||||
return err
|
||||
case <-timerCh:
|
||||
_ = cmd.Process.Kill()
|
||||
<-errC
|
||||
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
|
||||
}
|
||||
}
|
97
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
Normal file
97
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
Normal file
@ -0,0 +1,97 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
var (
|
||||
errNoUIDMap = errors.New("user namespaces enabled, but no uid mappings found")
|
||||
errNoGIDMap = errors.New("user namespaces enabled, but no gid mappings found")
|
||||
)
|
||||
|
||||
// Please check https://man7.org/linux/man-pages/man2/personality.2.html for const details.
|
||||
// https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/personality.h
|
||||
const (
|
||||
PerLinux = 0x0000
|
||||
PerLinux32 = 0x0008
|
||||
)
|
||||
|
||||
type LinuxPersonality struct {
|
||||
// Domain for the personality
|
||||
// can only contain values "LINUX" and "LINUX32"
|
||||
Domain int `json:"domain"`
|
||||
}
|
||||
|
||||
// HostUID gets the translated uid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostUID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if len(c.UIDMappings) == 0 {
|
||||
return -1, errNoUIDMap
|
||||
}
|
||||
id, found := c.hostIDFromMapping(int64(containerId), c.UIDMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("user namespaces enabled, but no mapping found for uid %d", containerId)
|
||||
}
|
||||
// If we are a 32-bit binary running on a 64-bit system, it's possible
|
||||
// the mapped user is too large to store in an int, which means we
|
||||
// cannot do the mapping. We can't just return an int64, because
|
||||
// os.Setuid() takes an int.
|
||||
if id > math.MaxInt {
|
||||
return -1, fmt.Errorf("mapping for uid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt)
|
||||
}
|
||||
return int(id), nil
|
||||
}
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostRootUID gets the root uid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostRootUID() (int, error) {
|
||||
return c.HostUID(0)
|
||||
}
|
||||
|
||||
// HostGID gets the translated gid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostGID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if len(c.GIDMappings) == 0 {
|
||||
return -1, errNoGIDMap
|
||||
}
|
||||
id, found := c.hostIDFromMapping(int64(containerId), c.GIDMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("user namespaces enabled, but no mapping found for gid %d", containerId)
|
||||
}
|
||||
// If we are a 32-bit binary running on a 64-bit system, it's possible
|
||||
// the mapped user is too large to store in an int, which means we
|
||||
// cannot do the mapping. We can't just return an int64, because
|
||||
// os.Setgid() takes an int.
|
||||
if id > math.MaxInt {
|
||||
return -1, fmt.Errorf("mapping for gid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt)
|
||||
}
|
||||
return int(id), nil
|
||||
}
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostRootGID gets the root gid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostRootGID() (int, error) {
|
||||
return c.HostGID(0)
|
||||
}
|
||||
|
||||
// Utility function that gets a host ID for a container ID from user namespace map
|
||||
// if that ID is present in the map.
|
||||
func (c Config) hostIDFromMapping(containerID int64, uMap []IDMap) (int64, bool) {
|
||||
for _, m := range uMap {
|
||||
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
|
||||
hostID := m.HostID + (containerID - m.ContainerID)
|
||||
return hostID, true
|
||||
}
|
||||
}
|
||||
return -1, false
|
||||
}
|
9
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
Normal file
9
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
//go:build gofuzz
|
||||
|
||||
package configs
|
||||
|
||||
func FuzzUnmarshalJSON(data []byte) int {
|
||||
hooks := Hooks{}
|
||||
_ = hooks.UnmarshalJSON(data)
|
||||
return 1
|
||||
}
|
9
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go
generated
vendored
Normal file
9
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
package configs
|
||||
|
||||
type HugepageLimit struct {
|
||||
// which type of hugepage to limit.
|
||||
Pagesize string `json:"page_size"`
|
||||
|
||||
// usage limit for hugepage.
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
16
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
Normal file
16
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
package configs
|
||||
|
||||
type IntelRdt struct {
|
||||
// The identity for RDT Class of Service
|
||||
ClosID string `json:"closID,omitempty"`
|
||||
|
||||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The schema of memory bandwidth per L3 cache id
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
// The unit of memory bandwidth is specified in "percentages" by
|
||||
// default, and in "MBps" if MBA Software Controller is enabled.
|
||||
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||
}
|
14
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go
generated
vendored
Normal file
14
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go
generated
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type IfPrioMap struct {
|
||||
Interface string `json:"interface"`
|
||||
Priority int64 `json:"priority"`
|
||||
}
|
||||
|
||||
func (i *IfPrioMap) CgroupString() string {
|
||||
return fmt.Sprintf("%s %d", i.Interface, i.Priority)
|
||||
}
|
7
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
Normal file
7
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
package configs
|
||||
|
||||
const (
|
||||
// EXT_COPYUP is a directive to copy up the contents of a directory when
|
||||
// a tmpfs is mounted over it.
|
||||
EXT_COPYUP = 1 << iota //nolint:golint,revive // ignore "don't use ALL_CAPS" warning
|
||||
)
|
66
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go
generated
vendored
Normal file
66
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go
generated
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
package configs
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
type MountIDMapping struct {
|
||||
// Recursive indicates if the mapping needs to be recursive.
|
||||
Recursive bool `json:"recursive"`
|
||||
|
||||
// UserNSPath is a path to a user namespace that indicates the necessary
|
||||
// id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and
|
||||
// GIDMappings must be set to nil.
|
||||
UserNSPath string `json:"userns_path,omitempty"`
|
||||
|
||||
// UIDMappings is the uid mapping set for this mount, to be used with
|
||||
// MOUNT_ATTR_IDMAP.
|
||||
UIDMappings []IDMap `json:"uid_mappings,omitempty"`
|
||||
|
||||
// GIDMappings is the gid mapping set for this mount, to be used with
|
||||
// MOUNT_ATTR_IDMAP.
|
||||
GIDMappings []IDMap `json:"gid_mappings,omitempty"`
|
||||
}
|
||||
|
||||
type Mount struct {
|
||||
// Source path for the mount.
|
||||
Source string `json:"source"`
|
||||
|
||||
// Destination path for the mount inside the container.
|
||||
Destination string `json:"destination"`
|
||||
|
||||
// Device the mount is for.
|
||||
Device string `json:"device"`
|
||||
|
||||
// Mount flags.
|
||||
Flags int `json:"flags"`
|
||||
|
||||
// Mount flags that were explicitly cleared in the configuration (meaning
|
||||
// the user explicitly requested that these flags *not* be set).
|
||||
ClearedFlags int `json:"cleared_flags"`
|
||||
|
||||
// Propagation Flags
|
||||
PropagationFlags []int `json:"propagation_flags"`
|
||||
|
||||
// Mount data applied to the mount.
|
||||
Data string `json:"data"`
|
||||
|
||||
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
|
||||
Relabel string `json:"relabel"`
|
||||
|
||||
// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
|
||||
RecAttr *unix.MountAttr `json:"rec_attr"`
|
||||
|
||||
// Extensions are additional flags that are specific to runc.
|
||||
Extensions int `json:"extensions"`
|
||||
|
||||
// Mapping is the MOUNT_ATTR_IDMAP configuration for the mount. If non-nil,
|
||||
// the mount is configured to use MOUNT_ATTR_IDMAP-style id mappings.
|
||||
IDMapping *MountIDMapping `json:"id_mapping,omitempty"`
|
||||
}
|
||||
|
||||
func (m *Mount) IsBind() bool {
|
||||
return m.Flags&unix.MS_BIND != 0
|
||||
}
|
||||
|
||||
func (m *Mount) IsIDMapped() bool {
|
||||
return m.IDMapping != nil
|
||||
}
|
9
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go
generated
vendored
Normal file
9
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
//go:build !linux
|
||||
|
||||
package configs
|
||||
|
||||
type Mount struct{}
|
||||
|
||||
func (m *Mount) IsBind() bool {
|
||||
return false
|
||||
}
|
5
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go
generated
vendored
Normal file
5
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go
generated
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
package configs
|
||||
|
||||
type NamespaceType string
|
||||
|
||||
type Namespaces []Namespace
|
133
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
generated
vendored
Normal file
133
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
generated
vendored
Normal file
@ -0,0 +1,133 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
NEWNET NamespaceType = "NEWNET"
|
||||
NEWPID NamespaceType = "NEWPID"
|
||||
NEWNS NamespaceType = "NEWNS"
|
||||
NEWUTS NamespaceType = "NEWUTS"
|
||||
NEWIPC NamespaceType = "NEWIPC"
|
||||
NEWUSER NamespaceType = "NEWUSER"
|
||||
NEWCGROUP NamespaceType = "NEWCGROUP"
|
||||
NEWTIME NamespaceType = "NEWTIME"
|
||||
)
|
||||
|
||||
var (
|
||||
nsLock sync.Mutex
|
||||
supportedNamespaces = make(map[NamespaceType]bool)
|
||||
)
|
||||
|
||||
// NsName converts the namespace type to its filename
|
||||
func NsName(ns NamespaceType) string {
|
||||
switch ns {
|
||||
case NEWNET:
|
||||
return "net"
|
||||
case NEWNS:
|
||||
return "mnt"
|
||||
case NEWPID:
|
||||
return "pid"
|
||||
case NEWIPC:
|
||||
return "ipc"
|
||||
case NEWUSER:
|
||||
return "user"
|
||||
case NEWUTS:
|
||||
return "uts"
|
||||
case NEWCGROUP:
|
||||
return "cgroup"
|
||||
case NEWTIME:
|
||||
return "time"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// IsNamespaceSupported returns whether a namespace is available or
|
||||
// not
|
||||
func IsNamespaceSupported(ns NamespaceType) bool {
|
||||
nsLock.Lock()
|
||||
defer nsLock.Unlock()
|
||||
supported, ok := supportedNamespaces[ns]
|
||||
if ok {
|
||||
return supported
|
||||
}
|
||||
nsFile := NsName(ns)
|
||||
// if the namespace type is unknown, just return false
|
||||
if nsFile == "" {
|
||||
return false
|
||||
}
|
||||
// We don't need to use /proc/thread-self here because the list of
|
||||
// namespace types is unrelated to the thread. This lets us avoid having to
|
||||
// do runtime.LockOSThread.
|
||||
_, err := os.Stat("/proc/self/ns/" + nsFile)
|
||||
// a namespace is supported if it exists and we have permissions to read it
|
||||
supported = err == nil
|
||||
supportedNamespaces[ns] = supported
|
||||
return supported
|
||||
}
|
||||
|
||||
func NamespaceTypes() []NamespaceType {
|
||||
return []NamespaceType{
|
||||
NEWUSER, // Keep user NS always first, don't move it.
|
||||
NEWIPC,
|
||||
NEWUTS,
|
||||
NEWNET,
|
||||
NEWPID,
|
||||
NEWNS,
|
||||
NEWCGROUP,
|
||||
NEWTIME,
|
||||
}
|
||||
}
|
||||
|
||||
// Namespace defines configuration for each namespace. It specifies an
|
||||
// alternate path that is able to be joined via setns.
|
||||
type Namespace struct {
|
||||
Type NamespaceType `json:"type"`
|
||||
Path string `json:"path"`
|
||||
}
|
||||
|
||||
func (n *Namespace) GetPath(pid int) string {
|
||||
return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
|
||||
}
|
||||
|
||||
func (n *Namespaces) Remove(t NamespaceType) bool {
|
||||
i := n.index(t)
|
||||
if i == -1 {
|
||||
return false
|
||||
}
|
||||
*n = append((*n)[:i], (*n)[i+1:]...)
|
||||
return true
|
||||
}
|
||||
|
||||
func (n *Namespaces) Add(t NamespaceType, path string) {
|
||||
i := n.index(t)
|
||||
if i == -1 {
|
||||
*n = append(*n, Namespace{Type: t, Path: path})
|
||||
return
|
||||
}
|
||||
(*n)[i].Path = path
|
||||
}
|
||||
|
||||
func (n *Namespaces) index(t NamespaceType) int {
|
||||
for i, ns := range *n {
|
||||
if ns.Type == t {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func (n *Namespaces) Contains(t NamespaceType) bool {
|
||||
return n.index(t) != -1
|
||||
}
|
||||
|
||||
func (n *Namespaces) PathOf(t NamespaceType) string {
|
||||
i := n.index(t)
|
||||
if i == -1 {
|
||||
return ""
|
||||
}
|
||||
return (*n)[i].Path
|
||||
}
|
45
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
Normal file
45
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
//go:build linux
|
||||
|
||||
package configs
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func (n *Namespace) Syscall() int {
|
||||
return namespaceInfo[n.Type]
|
||||
}
|
||||
|
||||
var namespaceInfo = map[NamespaceType]int{
|
||||
NEWNET: unix.CLONE_NEWNET,
|
||||
NEWNS: unix.CLONE_NEWNS,
|
||||
NEWUSER: unix.CLONE_NEWUSER,
|
||||
NEWIPC: unix.CLONE_NEWIPC,
|
||||
NEWUTS: unix.CLONE_NEWUTS,
|
||||
NEWPID: unix.CLONE_NEWPID,
|
||||
NEWCGROUP: unix.CLONE_NEWCGROUP,
|
||||
NEWTIME: unix.CLONE_NEWTIME,
|
||||
}
|
||||
|
||||
// CloneFlags parses the container's Namespaces options to set the correct
|
||||
// flags on clone, unshare. This function returns flags only for new namespaces.
|
||||
func (n *Namespaces) CloneFlags() uintptr {
|
||||
var flag int
|
||||
for _, v := range *n {
|
||||
if v.Path != "" {
|
||||
continue
|
||||
}
|
||||
flag |= namespaceInfo[v.Type]
|
||||
}
|
||||
return uintptr(flag)
|
||||
}
|
||||
|
||||
// IsPrivate tells whether the namespace of type t is configured as private
|
||||
// (i.e. it exists and is not shared).
|
||||
func (n Namespaces) IsPrivate(t NamespaceType) bool {
|
||||
for _, v := range n {
|
||||
if v.Type == t {
|
||||
return v.Path == ""
|
||||
}
|
||||
}
|
||||
// Not found, so implicitly sharing a parent namespace.
|
||||
return false
|
||||
}
|
13
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
generated
vendored
Normal file
13
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
//go:build !linux && !windows
|
||||
|
||||
package configs
|
||||
|
||||
func (n *Namespace) Syscall() int {
|
||||
panic("No namespace syscall support")
|
||||
}
|
||||
|
||||
// CloneFlags parses the container's Namespaces options to set the correct
|
||||
// flags on clone, unshare. This function returns flags only for new namespaces.
|
||||
func (n *Namespaces) CloneFlags() uintptr {
|
||||
panic("No namespace syscall support")
|
||||
}
|
7
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
Normal file
7
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
//go:build !linux
|
||||
|
||||
package configs
|
||||
|
||||
// Namespace defines configuration for each namespace. It specifies an
|
||||
// alternate path that is able to be joined via setns.
|
||||
type Namespace struct{}
|
75
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go
generated
vendored
Normal file
75
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/network.go
generated
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
package configs
|
||||
|
||||
// Network defines configuration for a container's networking stack
|
||||
//
|
||||
// The network configuration can be omitted from a container causing the
|
||||
// container to be setup with the host's networking stack
|
||||
type Network struct {
|
||||
// Type sets the networks type, commonly veth and loopback
|
||||
Type string `json:"type"`
|
||||
|
||||
// Name of the network interface
|
||||
Name string `json:"name"`
|
||||
|
||||
// The bridge to use.
|
||||
Bridge string `json:"bridge"`
|
||||
|
||||
// MacAddress contains the MAC address to set on the network interface
|
||||
MacAddress string `json:"mac_address"`
|
||||
|
||||
// Address contains the IPv4 and mask to set on the network interface
|
||||
Address string `json:"address"`
|
||||
|
||||
// Gateway sets the gateway address that is used as the default for the interface
|
||||
Gateway string `json:"gateway"`
|
||||
|
||||
// IPv6Address contains the IPv6 and mask to set on the network interface
|
||||
IPv6Address string `json:"ipv6_address"`
|
||||
|
||||
// IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
|
||||
IPv6Gateway string `json:"ipv6_gateway"`
|
||||
|
||||
// Mtu sets the mtu value for the interface and will be mirrored on both the host and
|
||||
// container's interfaces if a pair is created, specifically in the case of type veth
|
||||
// Note: This does not apply to loopback interfaces.
|
||||
Mtu int `json:"mtu"`
|
||||
|
||||
// TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and
|
||||
// container's interfaces if a pair is created, specifically in the case of type veth
|
||||
// Note: This does not apply to loopback interfaces.
|
||||
TxQueueLen int `json:"txqueuelen"`
|
||||
|
||||
// HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the
|
||||
// container.
|
||||
HostInterfaceName string `json:"host_interface_name"`
|
||||
|
||||
// HairpinMode specifies if hairpin NAT should be enabled on the virtual interface
|
||||
// bridge port in the case of type veth
|
||||
// Note: This is unsupported on some systems.
|
||||
// Note: This does not apply to loopback interfaces.
|
||||
HairpinMode bool `json:"hairpin_mode"`
|
||||
}
|
||||
|
||||
// Route defines a routing table entry.
|
||||
//
|
||||
// Routes can be specified to create entries in the routing table as the container
|
||||
// is started.
|
||||
//
|
||||
// All of destination, source, and gateway should be either IPv4 or IPv6.
|
||||
// One of the three options must be present, and omitted entries will use their
|
||||
// IP family default for the route table. For IPv4 for example, setting the
|
||||
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
|
||||
// destination of 0.0.0.0(or *) when viewed in the route table.
|
||||
type Route struct {
|
||||
// Destination specifies the destination IP address and mask in the CIDR form.
|
||||
Destination string `json:"destination"`
|
||||
|
||||
// Source specifies the source IP address and mask in the CIDR form.
|
||||
Source string `json:"source"`
|
||||
|
||||
// Gateway specifies the gateway IP address.
|
||||
Gateway string `json:"gateway"`
|
||||
|
||||
// InterfaceName specifies the device to set this route up for, for example eth0.
|
||||
InterfaceName string `json:"interface_name"`
|
||||
}
|
9
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
generated
vendored
Normal file
9
e2e/vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
package configs
|
||||
|
||||
// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
|
||||
type LinuxRdma struct {
|
||||
// Maximum number of HCA handles that can be opened. Default is "no limit".
|
||||
HcaHandles *uint32 `json:"hca_handles,omitempty"`
|
||||
// Maximum number of HCA objects that can be created. Default is "no limit".
|
||||
HcaObjects *uint32 `json:"hca_objects,omitempty"`
|
||||
}
|
174
e2e/vendor/github.com/opencontainers/runc/libcontainer/devices/device.go
generated
vendored
Normal file
174
e2e/vendor/github.com/opencontainers/runc/libcontainer/devices/device.go
generated
vendored
Normal file
@ -0,0 +1,174 @@
|
||||
package devices
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
Wildcard = -1
|
||||
)
|
||||
|
||||
type Device struct {
|
||||
Rule
|
||||
|
||||
// Path to the device.
|
||||
Path string `json:"path"`
|
||||
|
||||
// FileMode permission bits for the device.
|
||||
FileMode os.FileMode `json:"file_mode"`
|
||||
|
||||
// Uid of the device.
|
||||
Uid uint32 `json:"uid"`
|
||||
|
||||
// Gid of the device.
|
||||
Gid uint32 `json:"gid"`
|
||||
}
|
||||
|
||||
// Permissions is a cgroupv1-style string to represent device access. It
|
||||
// has to be a string for backward compatibility reasons, hence why it has
|
||||
// methods to do set operations.
|
||||
type Permissions string
|
||||
|
||||
const (
|
||||
deviceRead uint = (1 << iota)
|
||||
deviceWrite
|
||||
deviceMknod
|
||||
)
|
||||
|
||||
func (p Permissions) toSet() uint {
|
||||
var set uint
|
||||
for _, perm := range p {
|
||||
switch perm {
|
||||
case 'r':
|
||||
set |= deviceRead
|
||||
case 'w':
|
||||
set |= deviceWrite
|
||||
case 'm':
|
||||
set |= deviceMknod
|
||||
}
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
func fromSet(set uint) Permissions {
|
||||
var perm string
|
||||
if set&deviceRead == deviceRead {
|
||||
perm += "r"
|
||||
}
|
||||
if set&deviceWrite == deviceWrite {
|
||||
perm += "w"
|
||||
}
|
||||
if set&deviceMknod == deviceMknod {
|
||||
perm += "m"
|
||||
}
|
||||
return Permissions(perm)
|
||||
}
|
||||
|
||||
// Union returns the union of the two sets of Permissions.
|
||||
func (p Permissions) Union(o Permissions) Permissions {
|
||||
lhs := p.toSet()
|
||||
rhs := o.toSet()
|
||||
return fromSet(lhs | rhs)
|
||||
}
|
||||
|
||||
// Difference returns the set difference of the two sets of Permissions.
|
||||
// In set notation, A.Difference(B) gives you A\B.
|
||||
func (p Permissions) Difference(o Permissions) Permissions {
|
||||
lhs := p.toSet()
|
||||
rhs := o.toSet()
|
||||
return fromSet(lhs &^ rhs)
|
||||
}
|
||||
|
||||
// Intersection computes the intersection of the two sets of Permissions.
|
||||
func (p Permissions) Intersection(o Permissions) Permissions {
|
||||
lhs := p.toSet()
|
||||
rhs := o.toSet()
|
||||
return fromSet(lhs & rhs)
|
||||
}
|
||||
|
||||
// IsEmpty returns whether the set of permissions in a Permissions is
|
||||
// empty.
|
||||
func (p Permissions) IsEmpty() bool {
|
||||
return p == Permissions("")
|
||||
}
|
||||
|
||||
// IsValid returns whether the set of permissions is a subset of valid
|
||||
// permissions (namely, {r,w,m}).
|
||||
func (p Permissions) IsValid() bool {
|
||||
return p == fromSet(p.toSet())
|
||||
}
|
||||
|
||||
type Type rune
|
||||
|
||||
const (
|
||||
WildcardDevice Type = 'a'
|
||||
BlockDevice Type = 'b'
|
||||
CharDevice Type = 'c' // or 'u'
|
||||
FifoDevice Type = 'p'
|
||||
)
|
||||
|
||||
func (t Type) IsValid() bool {
|
||||
switch t {
|
||||
case WildcardDevice, BlockDevice, CharDevice, FifoDevice:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (t Type) CanMknod() bool {
|
||||
switch t {
|
||||
case BlockDevice, CharDevice, FifoDevice:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (t Type) CanCgroup() bool {
|
||||
switch t {
|
||||
case WildcardDevice, BlockDevice, CharDevice:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
type Rule struct {
|
||||
// Type of device ('c' for char, 'b' for block). If set to 'a', this rule
|
||||
// acts as a wildcard and all fields other than Allow are ignored.
|
||||
Type Type `json:"type"`
|
||||
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
|
||||
// Permissions is the set of permissions that this rule applies to (in the
|
||||
// cgroupv1 format -- any combination of "rwm").
|
||||
Permissions Permissions `json:"permissions"`
|
||||
|
||||
// Allow specifies whether this rule is allowed.
|
||||
Allow bool `json:"allow"`
|
||||
}
|
||||
|
||||
func (d *Rule) CgroupString() string {
|
||||
var (
|
||||
major = strconv.FormatInt(d.Major, 10)
|
||||
minor = strconv.FormatInt(d.Minor, 10)
|
||||
)
|
||||
if d.Major == Wildcard {
|
||||
major = "*"
|
||||
}
|
||||
if d.Minor == Wildcard {
|
||||
minor = "*"
|
||||
}
|
||||
return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
|
||||
}
|
||||
|
||||
func (d *Rule) Mkdev() (uint64, error) {
|
||||
return mkDev(d)
|
||||
}
|
119
e2e/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go
generated
vendored
Normal file
119
e2e/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go
generated
vendored
Normal file
@ -0,0 +1,119 @@
|
||||
//go:build !windows
|
||||
|
||||
package devices
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// ErrNotADevice denotes that a file is not a valid linux device.
|
||||
var ErrNotADevice = errors.New("not a device node")
|
||||
|
||||
// Testing dependencies
|
||||
var (
|
||||
unixLstat = unix.Lstat
|
||||
osReadDir = os.ReadDir
|
||||
)
|
||||
|
||||
func mkDev(d *Rule) (uint64, error) {
|
||||
if d.Major == Wildcard || d.Minor == Wildcard {
|
||||
return 0, errors.New("cannot mkdev() device with wildcards")
|
||||
}
|
||||
return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil
|
||||
}
|
||||
|
||||
// DeviceFromPath takes the path to a device and its cgroup_permissions (which
|
||||
// cannot be easily queried) to look up the information about a linux device
|
||||
// and returns that information as a Device struct.
|
||||
func DeviceFromPath(path, permissions string) (*Device, error) {
|
||||
var stat unix.Stat_t
|
||||
err := unixLstat(path, &stat)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
devType Type
|
||||
mode = stat.Mode
|
||||
devNumber = uint64(stat.Rdev) //nolint:unconvert // Rdev is uint32 on e.g. MIPS.
|
||||
major = unix.Major(devNumber)
|
||||
minor = unix.Minor(devNumber)
|
||||
)
|
||||
switch mode & unix.S_IFMT {
|
||||
case unix.S_IFBLK:
|
||||
devType = BlockDevice
|
||||
case unix.S_IFCHR:
|
||||
devType = CharDevice
|
||||
case unix.S_IFIFO:
|
||||
devType = FifoDevice
|
||||
default:
|
||||
return nil, ErrNotADevice
|
||||
}
|
||||
return &Device{
|
||||
Rule: Rule{
|
||||
Type: devType,
|
||||
Major: int64(major),
|
||||
Minor: int64(minor),
|
||||
Permissions: Permissions(permissions),
|
||||
},
|
||||
Path: path,
|
||||
FileMode: os.FileMode(mode &^ unix.S_IFMT),
|
||||
Uid: stat.Uid,
|
||||
Gid: stat.Gid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// HostDevices returns all devices that can be found under /dev directory.
|
||||
func HostDevices() ([]*Device, error) {
|
||||
return GetDevices("/dev")
|
||||
}
|
||||
|
||||
// GetDevices recursively traverses a directory specified by path
|
||||
// and returns all devices found there.
|
||||
func GetDevices(path string) ([]*Device, error) {
|
||||
files, err := osReadDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var out []*Device
|
||||
for _, f := range files {
|
||||
switch {
|
||||
case f.IsDir():
|
||||
switch f.Name() {
|
||||
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
|
||||
// ".udev" added to address https://github.com/opencontainers/runc/issues/2093
|
||||
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts", ".udev":
|
||||
continue
|
||||
default:
|
||||
sub, err := GetDevices(filepath.Join(path, f.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
out = append(out, sub...)
|
||||
continue
|
||||
}
|
||||
case f.Name() == "console":
|
||||
continue
|
||||
}
|
||||
device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
|
||||
if err != nil {
|
||||
if errors.Is(err, ErrNotADevice) {
|
||||
continue
|
||||
}
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if device.Type == FifoDevice {
|
||||
continue
|
||||
}
|
||||
out = append(out, device)
|
||||
}
|
||||
return out, nil
|
||||
}
|
23
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go
generated
vendored
Normal file
23
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go
generated
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
package intelrdt
|
||||
|
||||
var cmtEnabled bool
|
||||
|
||||
// Check if Intel RDT/CMT is enabled.
|
||||
func IsCMTEnabled() bool {
|
||||
featuresInit()
|
||||
return cmtEnabled
|
||||
}
|
||||
|
||||
func getCMTNumaNodeStats(numaPath string) (*CMTNumaNodeStats, error) {
|
||||
stats := &CMTNumaNodeStats{}
|
||||
|
||||
if enabledMonFeatures.llcOccupancy {
|
||||
llcOccupancy, err := getIntelRdtParamUint(numaPath, "llc_occupancy")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.LLCOccupancy = llcOccupancy
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
681
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
generated
vendored
Normal file
681
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
generated
vendored
Normal file
@ -0,0 +1,681 @@
|
||||
package intelrdt
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
/*
|
||||
* About Intel RDT features:
|
||||
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||
* Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
|
||||
* two sub-features of RDT.
|
||||
*
|
||||
* Cache Allocation Technology (CAT) provides a way for the software to restrict
|
||||
* cache allocation to a defined 'subset' of L3 cache which may be overlapping
|
||||
* with other 'subsets'. The different subsets are identified by class of
|
||||
* service (CLOS) and each CLOS has a capacity bitmask (CBM).
|
||||
*
|
||||
* Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
|
||||
* over memory bandwidth for the software. A user controls the resource by
|
||||
* indicating the percentage of maximum memory bandwidth or memory bandwidth
|
||||
* limit in MBps unit if MBA Software Controller is enabled.
|
||||
*
|
||||
* More details about Intel RDT CAT and MBA can be found in the section 17.18
|
||||
* of Intel Software Developer Manual:
|
||||
* https://software.intel.com/en-us/articles/intel-sdm
|
||||
*
|
||||
* About Intel RDT kernel interface:
|
||||
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||
* "resource control" filesystem, which is a "cgroup-like" interface.
|
||||
*
|
||||
* Comparing with cgroups, it has similar process management lifecycle and
|
||||
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||
* filesystem layout.
|
||||
*
|
||||
* CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
|
||||
* "resource control" filesystem.
|
||||
*
|
||||
* Intel RDT "resource control" filesystem hierarchy:
|
||||
* mount -t resctrl resctrl /sys/fs/resctrl
|
||||
* tree /sys/fs/resctrl
|
||||
* /sys/fs/resctrl/
|
||||
* |-- info
|
||||
* | |-- L3
|
||||
* | | |-- cbm_mask
|
||||
* | | |-- min_cbm_bits
|
||||
* | | |-- num_closids
|
||||
* | |-- L3_MON
|
||||
* | | |-- max_threshold_occupancy
|
||||
* | | |-- mon_features
|
||||
* | | |-- num_rmids
|
||||
* | |-- MB
|
||||
* | |-- bandwidth_gran
|
||||
* | |-- delay_linear
|
||||
* | |-- min_bandwidth
|
||||
* | |-- num_closids
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
* |-- <clos>
|
||||
* |-- ...
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
*
|
||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3
|
||||
* cache and memory bandwidth resources constraints.
|
||||
*
|
||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
* <container_id>" group). Tasks can be added to a group by writing the task ID
|
||||
* to the "tasks" file (which will automatically remove them from the previous
|
||||
* group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||
* added to the same group as their parent.
|
||||
*
|
||||
* The file `schemata` has a list of all the resources available to this group.
|
||||
* Each resource (L3 cache, memory bandwidth) has its own line and format.
|
||||
*
|
||||
* L3 cache schema:
|
||||
* It has allocation bitmasks/values for L3 cache on each socket, which
|
||||
* contains L3 cache id and capacity bitmask (CBM).
|
||||
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
* For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
|
||||
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
*
|
||||
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||
* be set is less than the max bit. The max bits in the CBM is varied among
|
||||
* supported Intel CPU models. Kernel will check if it is valid when writing.
|
||||
* e.g., default value 0xfffff in root indicates the max bits of CBM is 20
|
||||
* bits, which mapping to entire L3 cache capacity. Some valid CBM values to
|
||||
* set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
*
|
||||
* Memory bandwidth schema:
|
||||
* It has allocation values for memory bandwidth on each socket, which contains
|
||||
* L3 cache id and memory bandwidth.
|
||||
* Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
* For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
|
||||
*
|
||||
* The minimum bandwidth percentage value for each CPU model is predefined and
|
||||
* can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
|
||||
* that is allocated is also dependent on the CPU model and can be looked up at
|
||||
* "info/MB/bandwidth_gran". The available bandwidth control steps are:
|
||||
* min_bw + N * bw_gran. Intermediate values are rounded to the next control
|
||||
* step available on the hardware.
|
||||
*
|
||||
* If MBA Software Controller is enabled through mount option "-o mba_MBps":
|
||||
* mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
|
||||
* We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit
|
||||
* instead of "percentages". The kernel underneath would use a software feedback
|
||||
* mechanism or a "Software Controller" which reads the actual bandwidth using
|
||||
* MBM counters and adjust the memory bandwidth percentages to ensure:
|
||||
* "actual memory bandwidth < user specified memory bandwidth".
|
||||
*
|
||||
* For example, on a two-socket machine, the schema line could be
|
||||
* "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0
|
||||
* and 7000 MBps memory bandwidth limit on socket 1.
|
||||
*
|
||||
* For more information about Intel RDT kernel interface:
|
||||
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||
*
|
||||
* An example for runc:
|
||||
* Consider a two-socket machine with two L3 caches where the default CBM is
|
||||
* 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
|
||||
* with a memory bandwidth granularity of 10%.
|
||||
*
|
||||
* Tasks inside the container only have access to the "upper" 7/11 of L3 cache
|
||||
* on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
|
||||
* maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
|
||||
*
|
||||
* "linux": {
|
||||
* "intelRdt": {
|
||||
* "l3CacheSchema": "L3:0=7f0;1=1f",
|
||||
* "memBwSchema": "MB:0=20;1=70"
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
config *configs.Config
|
||||
id string
|
||||
path string
|
||||
}
|
||||
|
||||
// NewManager returns a new instance of Manager, or nil if the Intel RDT
|
||||
// functionality is not specified in the config, available from hardware or
|
||||
// enabled in the kernel.
|
||||
func NewManager(config *configs.Config, id string, path string) *Manager {
|
||||
if config.IntelRdt == nil {
|
||||
return nil
|
||||
}
|
||||
if _, err := Root(); err != nil {
|
||||
// Intel RDT is not available.
|
||||
return nil
|
||||
}
|
||||
return newManager(config, id, path)
|
||||
}
|
||||
|
||||
// newManager is the same as NewManager, except it does not check if the feature
|
||||
// is actually available. Used by unit tests that mock intelrdt paths.
|
||||
func newManager(config *configs.Config, id string, path string) *Manager {
|
||||
return &Manager{
|
||||
config: config,
|
||||
id: id,
|
||||
path: path,
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
intelRdtTasks = "tasks"
|
||||
)
|
||||
|
||||
var (
|
||||
// The flag to indicate if Intel RDT/CAT is enabled
|
||||
catEnabled bool
|
||||
// The flag to indicate if Intel RDT/MBA is enabled
|
||||
mbaEnabled bool
|
||||
|
||||
// For Intel RDT initialization
|
||||
initOnce sync.Once
|
||||
|
||||
errNotFound = errors.New("Intel RDT not available")
|
||||
)
|
||||
|
||||
// Check if Intel RDT sub-features are enabled in featuresInit()
|
||||
func featuresInit() {
|
||||
initOnce.Do(func() {
|
||||
// 1. Check if Intel RDT "resource control" filesystem is available.
|
||||
// The user guarantees to mount the filesystem.
|
||||
root, err := Root()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Check if Intel RDT sub-features are available in "resource
|
||||
// control" filesystem. Intel RDT sub-features can be
|
||||
// selectively disabled or enabled by kernel command line
|
||||
// (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel
|
||||
if _, err := os.Stat(filepath.Join(root, "info", "L3")); err == nil {
|
||||
catEnabled = true
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(root, "info", "MB")); err == nil {
|
||||
mbaEnabled = true
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(root, "info", "L3_MON")); err != nil {
|
||||
return
|
||||
}
|
||||
enabledMonFeatures, err = getMonFeatures(root)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if enabledMonFeatures.mbmTotalBytes || enabledMonFeatures.mbmLocalBytes {
|
||||
mbmEnabled = true
|
||||
}
|
||||
if enabledMonFeatures.llcOccupancy {
|
||||
cmtEnabled = true
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// findIntelRdtMountpointDir returns the mount point of the Intel RDT "resource control" filesystem.
|
||||
func findIntelRdtMountpointDir() (string, error) {
|
||||
mi, err := mountinfo.GetMounts(func(m *mountinfo.Info) (bool, bool) {
|
||||
// similar to mountinfo.FSTypeFilter but stops after the first match
|
||||
if m.FSType == "resctrl" {
|
||||
return false, true // don't skip, stop
|
||||
}
|
||||
return true, false // skip, keep going
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", errNotFound
|
||||
}
|
||||
|
||||
return mi[0].Mountpoint, nil
|
||||
}
|
||||
|
||||
// For Root() use only.
|
||||
var (
|
||||
intelRdtRoot string
|
||||
intelRdtRootErr error
|
||||
rootOnce sync.Once
|
||||
)
|
||||
|
||||
// The kernel creates this (empty) directory if resctrl is supported by the
|
||||
// hardware and kernel. The user is responsible for mounting the resctrl
|
||||
// filesystem, and they could mount it somewhere else if they wanted to.
|
||||
const defaultResctrlMountpoint = "/sys/fs/resctrl"
|
||||
|
||||
// Root returns the Intel RDT "resource control" filesystem mount point.
|
||||
func Root() (string, error) {
|
||||
rootOnce.Do(func() {
|
||||
// Does this system support resctrl?
|
||||
var statfs unix.Statfs_t
|
||||
if err := unix.Statfs(defaultResctrlMountpoint, &statfs); err != nil {
|
||||
if errors.Is(err, unix.ENOENT) {
|
||||
err = errNotFound
|
||||
}
|
||||
intelRdtRootErr = err
|
||||
return
|
||||
}
|
||||
|
||||
// Has the resctrl fs been mounted to the default mount point?
|
||||
if statfs.Type == unix.RDTGROUP_SUPER_MAGIC {
|
||||
intelRdtRoot = defaultResctrlMountpoint
|
||||
return
|
||||
}
|
||||
|
||||
// The resctrl fs could have been mounted somewhere nonstandard.
|
||||
intelRdtRoot, intelRdtRootErr = findIntelRdtMountpointDir()
|
||||
})
|
||||
|
||||
return intelRdtRoot, intelRdtRootErr
|
||||
}
|
||||
|
||||
// Gets a single uint64 value from the specified file.
|
||||
func getIntelRdtParamUint(path, file string) (uint64, error) {
|
||||
fileName := filepath.Join(path, file)
|
||||
contents, err := os.ReadFile(fileName)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
res, err := fscommon.ParseUint(string(bytes.TrimSpace(contents)), 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Gets a string value from the specified file
|
||||
func getIntelRdtParamString(path, file string) (string, error) {
|
||||
contents, err := os.ReadFile(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(bytes.TrimSpace(contents)), nil
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil {
|
||||
return newLastCmdError(fmt.Errorf("intelrdt: unable to write %v: %w", data, err))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get the read-only L3 cache information
|
||||
func getL3CacheInfo() (*L3CacheInfo, error) {
|
||||
l3CacheInfo := &L3CacheInfo{}
|
||||
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, "info", "L3")
|
||||
cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
numClosids, err := getIntelRdtParamUint(path, "num_closids")
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
|
||||
l3CacheInfo.CbmMask = cbmMask
|
||||
l3CacheInfo.MinCbmBits = minCbmBits
|
||||
l3CacheInfo.NumClosids = numClosids
|
||||
|
||||
return l3CacheInfo, nil
|
||||
}
|
||||
|
||||
// Get the read-only memory bandwidth information
|
||||
func getMemBwInfo() (*MemBwInfo, error) {
|
||||
memBwInfo := &MemBwInfo{}
|
||||
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, "info", "MB")
|
||||
bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
delayLinear, err := getIntelRdtParamUint(path, "delay_linear")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
numClosids, err := getIntelRdtParamUint(path, "num_closids")
|
||||
if err != nil {
|
||||
return memBwInfo, err
|
||||
}
|
||||
|
||||
memBwInfo.BandwidthGran = bandwidthGran
|
||||
memBwInfo.DelayLinear = delayLinear
|
||||
memBwInfo.MinBandwidth = minBandwidth
|
||||
memBwInfo.NumClosids = numClosids
|
||||
|
||||
return memBwInfo, nil
|
||||
}
|
||||
|
||||
// Get diagnostics for last filesystem operation error from file info/last_cmd_status
|
||||
func getLastCmdStatus() (string, error) {
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, "info")
|
||||
lastCmdStatus, err := getIntelRdtParamString(path, "last_cmd_status")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return lastCmdStatus, nil
|
||||
}
|
||||
|
||||
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
|
||||
func WriteIntelRdtTasks(dir string, pid int) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", intelRdtTasks)
|
||||
}
|
||||
|
||||
// Don't attach any pid if -1 is specified as a pid
|
||||
if pid != -1 {
|
||||
if err := os.WriteFile(filepath.Join(dir, intelRdtTasks), []byte(strconv.Itoa(pid)), 0o600); err != nil {
|
||||
return newLastCmdError(fmt.Errorf("intelrdt: unable to add pid %d: %w", pid, err))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if Intel RDT/CAT is enabled
|
||||
func IsCATEnabled() bool {
|
||||
featuresInit()
|
||||
return catEnabled
|
||||
}
|
||||
|
||||
// Check if Intel RDT/MBA is enabled
|
||||
func IsMBAEnabled() bool {
|
||||
featuresInit()
|
||||
return mbaEnabled
|
||||
}
|
||||
|
||||
// Get the path of the clos group in "resource control" filesystem that the container belongs to
|
||||
func (m *Manager) getIntelRdtPath() (string, error) {
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
clos := m.id
|
||||
if m.config.IntelRdt != nil && m.config.IntelRdt.ClosID != "" {
|
||||
clos = m.config.IntelRdt.ClosID
|
||||
}
|
||||
|
||||
return filepath.Join(rootPath, clos), nil
|
||||
}
|
||||
|
||||
// Applies Intel RDT configuration to the process with the specified pid
|
||||
func (m *Manager) Apply(pid int) (err error) {
|
||||
// If intelRdt is not specified in config, we do nothing
|
||||
if m.config.IntelRdt == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
path, err := m.getIntelRdtPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
if m.config.IntelRdt.ClosID != "" && m.config.IntelRdt.L3CacheSchema == "" && m.config.IntelRdt.MemBwSchema == "" {
|
||||
// Check that the CLOS exists, i.e. it has been pre-configured to
|
||||
// conform with the runtime spec
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return fmt.Errorf("clos dir not accessible (must be pre-created when l3CacheSchema and memBwSchema are empty): %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return newLastCmdError(err)
|
||||
}
|
||||
|
||||
if err := WriteIntelRdtTasks(path, pid); err != nil {
|
||||
return newLastCmdError(err)
|
||||
}
|
||||
|
||||
m.path = path
|
||||
return nil
|
||||
}
|
||||
|
||||
// Destroys the Intel RDT container-specific 'container_id' group
|
||||
func (m *Manager) Destroy() error {
|
||||
// Don't remove resctrl group if closid has been explicitly specified. The
|
||||
// group is likely externally managed, i.e. by some other entity than us.
|
||||
// There are probably other containers/tasks sharing the same group.
|
||||
if m.config.IntelRdt != nil && m.config.IntelRdt.ClosID == "" {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := os.RemoveAll(m.GetPath()); err != nil {
|
||||
return err
|
||||
}
|
||||
m.path = ""
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns Intel RDT path to save in a state file and to be able to
|
||||
// restore the object later
|
||||
func (m *Manager) GetPath() string {
|
||||
if m.path == "" {
|
||||
m.path, _ = m.getIntelRdtPath()
|
||||
}
|
||||
return m.path
|
||||
}
|
||||
|
||||
// Returns statistics for Intel RDT
|
||||
func (m *Manager) GetStats() (*Stats, error) {
|
||||
// If intelRdt is not specified in config
|
||||
if m.config.IntelRdt == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := newStats()
|
||||
|
||||
rootPath, err := Root()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// The read-only L3 cache and memory bandwidth schemata in root
|
||||
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
||||
|
||||
// The L3 cache and memory bandwidth schemata in container's clos group
|
||||
containerPath := m.GetPath()
|
||||
tmpStrings, err := getIntelRdtParamString(containerPath, "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
schemaStrings := strings.Split(tmpStrings, "\n")
|
||||
|
||||
if IsCATEnabled() {
|
||||
// The read-only L3 cache information
|
||||
l3CacheInfo, err := getL3CacheInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.L3CacheInfo = l3CacheInfo
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
for _, schemaRoot := range schemaRootStrings {
|
||||
if strings.Contains(schemaRoot, "L3") {
|
||||
stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||
}
|
||||
}
|
||||
|
||||
// The L3 cache schema in container's clos group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "L3") {
|
||||
stats.L3CacheSchema = strings.TrimSpace(schema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if IsMBAEnabled() {
|
||||
// The read-only memory bandwidth information
|
||||
memBwInfo, err := getMemBwInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.MemBwInfo = memBwInfo
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
for _, schemaRoot := range schemaRootStrings {
|
||||
if strings.Contains(schemaRoot, "MB") {
|
||||
stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot)
|
||||
}
|
||||
}
|
||||
|
||||
// The memory bandwidth schema in container's clos group
|
||||
for _, schema := range schemaStrings {
|
||||
if strings.Contains(schema, "MB") {
|
||||
stats.MemBwSchema = strings.TrimSpace(schema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if IsMBMEnabled() || IsCMTEnabled() {
|
||||
err = getMonitoringStats(containerPath, stats)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// Set Intel RDT "resource control" filesystem as configured.
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
// About L3 cache schema:
|
||||
// It has allocation bitmasks/values for L3 cache on each socket,
|
||||
// which contains L3 cache id and capacity bitmask (CBM).
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
// For example, on a two-socket machine, the schema line could be:
|
||||
// L3:0=ff;1=c0
|
||||
// which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM
|
||||
// is 0xc0.
|
||||
//
|
||||
// The valid L3 cache CBM is a *contiguous bits set* and number of
|
||||
// bits that can be set is less than the max bit. The max bits in the
|
||||
// CBM is varied among supported Intel CPU models. Kernel will check
|
||||
// if it is valid when writing. e.g., default value 0xfffff in root
|
||||
// indicates the max bits of CBM is 20 bits, which mapping to entire
|
||||
// L3 cache capacity. Some valid CBM values to set in a group:
|
||||
// 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
//
|
||||
//
|
||||
// About memory bandwidth schema:
|
||||
// It has allocation values for memory bandwidth on each socket, which
|
||||
// contains L3 cache id and memory bandwidth.
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
// For example, on a two-socket machine, the schema line could be:
|
||||
// "MB:0=20;1=70"
|
||||
//
|
||||
// The minimum bandwidth percentage value for each CPU model is
|
||||
// predefined and can be looked up through "info/MB/min_bandwidth".
|
||||
// The bandwidth granularity that is allocated is also dependent on
|
||||
// the CPU model and can be looked up at "info/MB/bandwidth_gran".
|
||||
// The available bandwidth control steps are: min_bw + N * bw_gran.
|
||||
// Intermediate values are rounded to the next control step available
|
||||
// on the hardware.
|
||||
//
|
||||
// If MBA Software Controller is enabled through mount option
|
||||
// "-o mba_MBps": mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
|
||||
// We could specify memory bandwidth in "MBps" (Mega Bytes per second)
|
||||
// unit instead of "percentages". The kernel underneath would use a
|
||||
// software feedback mechanism or a "Software Controller" which reads
|
||||
// the actual bandwidth using MBM counters and adjust the memory
|
||||
// bandwidth percentages to ensure:
|
||||
// "actual memory bandwidth < user specified memory bandwidth".
|
||||
//
|
||||
// For example, on a two-socket machine, the schema line could be
|
||||
// "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on
|
||||
// socket 0 and 7000 MBps memory bandwidth limit on socket 1.
|
||||
if container.IntelRdt != nil {
|
||||
path := m.GetPath()
|
||||
l3CacheSchema := container.IntelRdt.L3CacheSchema
|
||||
memBwSchema := container.IntelRdt.MemBwSchema
|
||||
|
||||
// TODO: verify that l3CacheSchema and/or memBwSchema match the
|
||||
// existing schemata if ClosID has been specified. This is a more
|
||||
// involved than reading the file and doing plain string comparison as
|
||||
// the value written in does not necessarily match what gets read out
|
||||
// (leading zeros, cache id ordering etc).
|
||||
|
||||
// Write a single joint schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Write only L3 cache schema string to schemata file
|
||||
if l3CacheSchema != "" && memBwSchema == "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Write only memory bandwidth schema string to schemata file
|
||||
if l3CacheSchema == "" && memBwSchema != "" {
|
||||
if err := writeFile(path, "schemata", memBwSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func newLastCmdError(err error) error {
|
||||
status, err1 := getLastCmdStatus()
|
||||
if err1 == nil {
|
||||
return fmt.Errorf("%w, last_cmd_status: %s", err, status)
|
||||
}
|
||||
return err
|
||||
}
|
31
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go
generated
vendored
Normal file
31
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go
generated
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
package intelrdt
|
||||
|
||||
// The flag to indicate if Intel RDT/MBM is enabled
|
||||
var mbmEnabled bool
|
||||
|
||||
// Check if Intel RDT/MBM is enabled.
|
||||
func IsMBMEnabled() bool {
|
||||
featuresInit()
|
||||
return mbmEnabled
|
||||
}
|
||||
|
||||
func getMBMNumaNodeStats(numaPath string) (*MBMNumaNodeStats, error) {
|
||||
stats := &MBMNumaNodeStats{}
|
||||
if enabledMonFeatures.mbmTotalBytes {
|
||||
mbmTotalBytes, err := getIntelRdtParamUint(numaPath, "mbm_total_bytes")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.MBMTotalBytes = mbmTotalBytes
|
||||
}
|
||||
|
||||
if enabledMonFeatures.mbmLocalBytes {
|
||||
mbmLocalBytes, err := getIntelRdtParamUint(numaPath, "mbm_local_bytes")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.MBMLocalBytes = mbmLocalBytes
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
83
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go
generated
vendored
Normal file
83
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go
generated
vendored
Normal file
@ -0,0 +1,83 @@
|
||||
package intelrdt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var enabledMonFeatures monFeatures
|
||||
|
||||
type monFeatures struct {
|
||||
mbmTotalBytes bool
|
||||
mbmLocalBytes bool
|
||||
llcOccupancy bool
|
||||
}
|
||||
|
||||
func getMonFeatures(intelRdtRoot string) (monFeatures, error) {
|
||||
file, err := os.Open(filepath.Join(intelRdtRoot, "info", "L3_MON", "mon_features"))
|
||||
if err != nil {
|
||||
return monFeatures{}, err
|
||||
}
|
||||
defer file.Close()
|
||||
return parseMonFeatures(file)
|
||||
}
|
||||
|
||||
func parseMonFeatures(reader io.Reader) (monFeatures, error) {
|
||||
scanner := bufio.NewScanner(reader)
|
||||
|
||||
monFeatures := monFeatures{}
|
||||
|
||||
for scanner.Scan() {
|
||||
switch feature := scanner.Text(); feature {
|
||||
case "mbm_total_bytes":
|
||||
monFeatures.mbmTotalBytes = true
|
||||
case "mbm_local_bytes":
|
||||
monFeatures.mbmLocalBytes = true
|
||||
case "llc_occupancy":
|
||||
monFeatures.llcOccupancy = true
|
||||
default:
|
||||
logrus.Warnf("Unsupported Intel RDT monitoring feature: %s", feature)
|
||||
}
|
||||
}
|
||||
|
||||
return monFeatures, scanner.Err()
|
||||
}
|
||||
|
||||
func getMonitoringStats(containerPath string, stats *Stats) error {
|
||||
numaFiles, err := os.ReadDir(filepath.Join(containerPath, "mon_data"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var mbmStats []MBMNumaNodeStats
|
||||
var cmtStats []CMTNumaNodeStats
|
||||
|
||||
for _, file := range numaFiles {
|
||||
if file.IsDir() {
|
||||
numaPath := filepath.Join(containerPath, "mon_data", file.Name())
|
||||
if IsMBMEnabled() {
|
||||
numaMBMStats, err := getMBMNumaNodeStats(numaPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mbmStats = append(mbmStats, *numaMBMStats)
|
||||
}
|
||||
if IsCMTEnabled() {
|
||||
numaCMTStats, err := getCMTNumaNodeStats(numaPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cmtStats = append(cmtStats, *numaCMTStats)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stats.MBMStats = &mbmStats
|
||||
stats.CMTStats = &cmtStats
|
||||
|
||||
return err
|
||||
}
|
57
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
generated
vendored
Normal file
57
e2e/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
generated
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
package intelrdt
|
||||
|
||||
type L3CacheInfo struct {
|
||||
CbmMask string `json:"cbm_mask,omitempty"`
|
||||
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type MemBwInfo struct {
|
||||
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
|
||||
DelayLinear uint64 `json:"delay_linear,omitempty"`
|
||||
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type MBMNumaNodeStats struct {
|
||||
// The 'mbm_total_bytes' in 'container_id' group.
|
||||
MBMTotalBytes uint64 `json:"mbm_total_bytes"`
|
||||
|
||||
// The 'mbm_local_bytes' in 'container_id' group.
|
||||
MBMLocalBytes uint64 `json:"mbm_local_bytes"`
|
||||
}
|
||||
|
||||
type CMTNumaNodeStats struct {
|
||||
// The 'llc_occupancy' in 'container_id' group.
|
||||
LLCOccupancy uint64 `json:"llc_occupancy"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth information
|
||||
MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"`
|
||||
|
||||
// The read-only memory bandwidth schema in root
|
||||
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||
|
||||
// The memory bandwidth monitoring statistics from NUMA nodes in 'container_id' group
|
||||
MBMStats *[]MBMNumaNodeStats `json:"mbm_stats,omitempty"`
|
||||
|
||||
// The cache monitoring technology statistics from NUMA nodes in 'container_id' group
|
||||
CMTStats *[]CMTNumaNodeStats `json:"cmt_stats,omitempty"`
|
||||
}
|
||||
|
||||
func newStats() *Stats {
|
||||
return &Stats{}
|
||||
}
|
119
e2e/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
Normal file
119
e2e/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
Normal file
@ -0,0 +1,119 @@
|
||||
package utils
|
||||
|
||||
/*
|
||||
* Copyright 2016, 2017 SUSE LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// MaxNameLen is the maximum length of the name of a file descriptor being sent
|
||||
// using SendFile. The name of the file handle returned by RecvFile will never be
|
||||
// larger than this value.
|
||||
const MaxNameLen = 4096
|
||||
|
||||
// oobSpace is the size of the oob slice required to store a single FD. Note
|
||||
// that unix.UnixRights appears to make the assumption that fd is always int32,
|
||||
// so sizeof(fd) = 4.
|
||||
var oobSpace = unix.CmsgSpace(4)
|
||||
|
||||
// RecvFile waits for a file descriptor to be sent over the given AF_UNIX
|
||||
// socket. The file name of the remote file descriptor will be recreated
|
||||
// locally (it is sent as non-auxiliary data in the same payload).
|
||||
func RecvFile(socket *os.File) (_ *os.File, Err error) {
|
||||
name := make([]byte, MaxNameLen)
|
||||
oob := make([]byte, oobSpace)
|
||||
|
||||
sockfd := socket.Fd()
|
||||
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if n >= MaxNameLen || oobn != oobSpace {
|
||||
return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
||||
}
|
||||
// Truncate.
|
||||
name = name[:n]
|
||||
oob = oob[:oobn]
|
||||
|
||||
scms, err := unix.ParseSocketControlMessage(oob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// We cannot control how many SCM_RIGHTS we receive, and upon receiving
|
||||
// them all of the descriptors are installed in our fd table, so we need to
|
||||
// parse all of the SCM_RIGHTS we received in order to close all of the
|
||||
// descriptors on error.
|
||||
var fds []int
|
||||
defer func() {
|
||||
for i, fd := range fds {
|
||||
if i == 0 && Err == nil {
|
||||
// Only close the first one on error.
|
||||
continue
|
||||
}
|
||||
// Always close extra ones.
|
||||
_ = unix.Close(fd)
|
||||
}
|
||||
}()
|
||||
var lastErr error
|
||||
for _, scm := range scms {
|
||||
if scm.Header.Type == unix.SCM_RIGHTS {
|
||||
scmFds, err := unix.ParseUnixRights(&scm)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
} else {
|
||||
fds = append(fds, scmFds...)
|
||||
}
|
||||
}
|
||||
}
|
||||
if lastErr != nil {
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
// We do this after collecting the fds to make sure we close them all when
|
||||
// returning an error here.
|
||||
if len(scms) != 1 {
|
||||
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
||||
}
|
||||
if len(fds) != 1 {
|
||||
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
|
||||
}
|
||||
return os.NewFile(uintptr(fds[0]), string(name)), nil
|
||||
}
|
||||
|
||||
// SendFile sends a file over the given AF_UNIX socket. file.Name() is also
|
||||
// included so that if the other end uses RecvFile, the file will have the same
|
||||
// name information.
|
||||
func SendFile(socket *os.File, file *os.File) error {
|
||||
name := file.Name()
|
||||
if len(name) >= MaxNameLen {
|
||||
return fmt.Errorf("sendfd: filename too long: %s", name)
|
||||
}
|
||||
err := SendRawFd(socket, name, file.Fd())
|
||||
runtime.KeepAlive(file)
|
||||
return err
|
||||
}
|
||||
|
||||
// SendRawFd sends a specific file descriptor over the given AF_UNIX socket.
|
||||
func SendRawFd(socket *os.File, msg string, fd uintptr) error {
|
||||
oob := unix.UnixRights(int(fd))
|
||||
return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0)
|
||||
}
|
115
e2e/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
Normal file
115
e2e/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
Normal file
@ -0,0 +1,115 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
exitSignalOffset = 128
|
||||
)
|
||||
|
||||
// ExitStatus returns the correct exit status for a process based on if it
|
||||
// was signaled or exited cleanly
|
||||
func ExitStatus(status unix.WaitStatus) int {
|
||||
if status.Signaled() {
|
||||
return exitSignalOffset + int(status.Signal())
|
||||
}
|
||||
return status.ExitStatus()
|
||||
}
|
||||
|
||||
// WriteJSON writes the provided struct v to w using standard json marshaling
|
||||
// without a trailing newline. This is used instead of json.Encoder because
|
||||
// there might be a problem in json decoder in some cases, see:
|
||||
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
|
||||
func WriteJSON(w io.Writer, v interface{}) error {
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = w.Write(data)
|
||||
return err
|
||||
}
|
||||
|
||||
// CleanPath makes a path safe for use with filepath.Join. This is done by not
|
||||
// only cleaning the path, but also (if the path is relative) adding a leading
|
||||
// '/' and cleaning it (then removing the leading '/'). This ensures that a
|
||||
// path resulting from prepending another path will always resolve to lexically
|
||||
// be a subdirectory of the prefixed path. This is all done lexically, so paths
|
||||
// that include symlinks won't be safe as a result of using CleanPath.
|
||||
func CleanPath(path string) string {
|
||||
// Deal with empty strings nicely.
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure that all paths are cleaned (especially problematic ones like
|
||||
// "/../../../../../" which can cause lots of issues).
|
||||
path = filepath.Clean(path)
|
||||
|
||||
// If the path isn't absolute, we need to do more processing to fix paths
|
||||
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
|
||||
// paths to relative ones.
|
||||
if !filepath.IsAbs(path) {
|
||||
path = filepath.Clean(string(os.PathSeparator) + path)
|
||||
// This can't fail, as (by definition) all paths are relative to root.
|
||||
path, _ = filepath.Rel(string(os.PathSeparator), path)
|
||||
}
|
||||
|
||||
// Clean the path again for good measure.
|
||||
return filepath.Clean(path)
|
||||
}
|
||||
|
||||
// stripRoot returns the passed path, stripping the root path if it was
|
||||
// (lexicially) inside it. Note that both passed paths will always be treated
|
||||
// as absolute, and the returned path will also always be absolute. In
|
||||
// addition, the paths are cleaned before stripping the root.
|
||||
func stripRoot(root, path string) string {
|
||||
// Make the paths clean and absolute.
|
||||
root, path = CleanPath("/"+root), CleanPath("/"+path)
|
||||
switch {
|
||||
case path == root:
|
||||
path = "/"
|
||||
case root == "/":
|
||||
// do nothing
|
||||
case strings.HasPrefix(path, root+"/"):
|
||||
path = strings.TrimPrefix(path, root+"/")
|
||||
}
|
||||
return CleanPath("/" + path)
|
||||
}
|
||||
|
||||
// SearchLabels searches through a list of key=value pairs for a given key,
|
||||
// returning its value, and the binary flag telling whether the key exist.
|
||||
func SearchLabels(labels []string, key string) (string, bool) {
|
||||
key += "="
|
||||
for _, s := range labels {
|
||||
if strings.HasPrefix(s, key) {
|
||||
return s[len(key):], true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Annotations returns the bundle path and user defined annotations from the
|
||||
// libcontainer state. We need to remove the bundle because that is a label
|
||||
// added by libcontainer.
|
||||
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
|
||||
userAnnotations = make(map[string]string)
|
||||
for _, l := range labels {
|
||||
name, value, ok := strings.Cut(l, "=")
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if name == "bundle" {
|
||||
bundle = value
|
||||
} else {
|
||||
userAnnotations[name] = value
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
363
e2e/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
Normal file
363
e2e/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
Normal file
@ -0,0 +1,363 @@
|
||||
//go:build !windows
|
||||
|
||||
package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
_ "unsafe" // for go:linkname
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// EnsureProcHandle returns whether or not the given file handle is on procfs.
|
||||
func EnsureProcHandle(fh *os.File) error {
|
||||
var buf unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
|
||||
return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
|
||||
}
|
||||
if buf.Type != unix.PROC_SUPER_MAGIC {
|
||||
return fmt.Errorf("%s is not on procfs", fh.Name())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
haveCloseRangeCloexecBool bool
|
||||
haveCloseRangeCloexecOnce sync.Once
|
||||
)
|
||||
|
||||
func haveCloseRangeCloexec() bool {
|
||||
haveCloseRangeCloexecOnce.Do(func() {
|
||||
// Make sure we're not closing a random file descriptor.
|
||||
tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer unix.Close(tmpFd)
|
||||
|
||||
err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC)
|
||||
// Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC).
|
||||
// -ENOSYS and -EINVAL ultimately mean we don't have support, but any
|
||||
// other potential error would imply that even the most basic close
|
||||
// operation wouldn't work.
|
||||
haveCloseRangeCloexecBool = err == nil
|
||||
})
|
||||
return haveCloseRangeCloexecBool
|
||||
}
|
||||
|
||||
type fdFunc func(fd int)
|
||||
|
||||
// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
|
||||
// the current process.
|
||||
func fdRangeFrom(minFd int, fn fdFunc) error {
|
||||
procSelfFd, closer := ProcThreadSelf("fd")
|
||||
defer closer()
|
||||
|
||||
fdDir, err := os.Open(procSelfFd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fdDir.Close()
|
||||
|
||||
if err := EnsureProcHandle(fdDir); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fdList, err := fdDir.Readdirnames(-1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, fdStr := range fdList {
|
||||
fd, err := strconv.Atoi(fdStr)
|
||||
// Ignore non-numeric file names.
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
// Ignore descriptors lower than our specified minimum.
|
||||
if fd < minFd {
|
||||
continue
|
||||
}
|
||||
// Ignore the file descriptor we used for readdir, as it will be closed
|
||||
// when we return.
|
||||
if uintptr(fd) == fdDir.Fd() {
|
||||
continue
|
||||
}
|
||||
// Run the closure.
|
||||
fn(fd)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
|
||||
// equal to minFd in the current process.
|
||||
func CloseExecFrom(minFd int) error {
|
||||
// Use close_range(CLOSE_RANGE_CLOEXEC) if possible.
|
||||
if haveCloseRangeCloexec() {
|
||||
err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
|
||||
return os.NewSyscallError("close_range", err)
|
||||
}
|
||||
// Otherwise, fall back to the standard loop.
|
||||
return fdRangeFrom(minFd, unix.CloseOnExec)
|
||||
}
|
||||
|
||||
//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
|
||||
|
||||
// In order to make sure we do not close the internal epoll descriptors the Go
|
||||
// runtime uses, we need to ensure that we skip descriptors that match
|
||||
// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
|
||||
// unfortunately there's no other way to be sure we're only keeping the file
|
||||
// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
|
||||
func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
|
||||
|
||||
// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
|
||||
// current process, except for those critical to Go's runtime (such as the
|
||||
// netpoll management descriptors).
|
||||
//
|
||||
// NOTE: That this function is incredibly dangerous to use in most Go code, as
|
||||
// closing file descriptors from underneath *os.File handles can lead to very
|
||||
// bad behaviour (the closed file descriptor can be re-used and then any
|
||||
// *os.File operations would apply to the wrong file). This function is only
|
||||
// intended to be called from the last stage of runc init.
|
||||
func UnsafeCloseFrom(minFd int) error {
|
||||
// We cannot use close_range(2) even if it is available, because we must
|
||||
// not close some file descriptors.
|
||||
return fdRangeFrom(minFd, func(fd int) {
|
||||
if runtime_IsPollDescriptor(uintptr(fd)) {
|
||||
// These are the Go runtimes internal netpoll file descriptors.
|
||||
// These file descriptors are operated on deep in the Go scheduler,
|
||||
// and closing those files from underneath Go can result in panics.
|
||||
// There is no issue with keeping them because they are not
|
||||
// executable and are not useful to an attacker anyway. Also we
|
||||
// don't have any choice.
|
||||
return
|
||||
}
|
||||
// There's nothing we can do about errors from close(2), and the
|
||||
// only likely error to be seen is EBADF which indicates the fd was
|
||||
// already closed (in which case, we got what we wanted).
|
||||
_ = unix.Close(fd)
|
||||
})
|
||||
}
|
||||
|
||||
// NewSockPair returns a new SOCK_STREAM unix socket pair.
|
||||
func NewSockPair(name string) (parent, child *os.File, err error) {
|
||||
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
|
||||
}
|
||||
|
||||
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
||||
// corresponding to the unsafePath resolved within the root. Before passing the
|
||||
// fd, this path is verified to have been inside the root -- so operating on it
|
||||
// through the passed fdpath should be safe. Do not access this path through
|
||||
// the original path strings, and do not attempt to use the pathname outside of
|
||||
// the passed closure (the file handle will be freed once the closure returns).
|
||||
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||
// Remove the root then forcefully resolve inside the root.
|
||||
unsafePath = stripRoot(root, unsafePath)
|
||||
path, err := securejoin.SecureJoin(root, unsafePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
||||
}
|
||||
|
||||
procSelfFd, closer := ProcThreadSelf("fd/")
|
||||
defer closer()
|
||||
|
||||
// Open the target path.
|
||||
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open o_path procfd: %w", err)
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd())))
|
||||
// Double-check the path is the one we expected.
|
||||
if realpath, err := os.Readlink(procfd); err != nil {
|
||||
return fmt.Errorf("procfd verification failed: %w", err)
|
||||
} else if realpath != path {
|
||||
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
||||
}
|
||||
|
||||
return fn(procfd)
|
||||
}
|
||||
|
||||
type ProcThreadSelfCloser func()
|
||||
|
||||
var (
|
||||
haveProcThreadSelf bool
|
||||
haveProcThreadSelfOnce sync.Once
|
||||
)
|
||||
|
||||
// ProcThreadSelf returns a string that is equivalent to
|
||||
// /proc/thread-self/<subpath>, with a graceful fallback on older kernels where
|
||||
// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin,
|
||||
// meaning that the passed string needs to be trusted. The caller _must_ call
|
||||
// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread)
|
||||
// *only once* after it has finished using the returned path string.
|
||||
func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
|
||||
haveProcThreadSelfOnce.Do(func() {
|
||||
if _, err := os.Stat("/proc/thread-self/"); err == nil {
|
||||
haveProcThreadSelf = true
|
||||
} else {
|
||||
logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err)
|
||||
}
|
||||
})
|
||||
|
||||
// We need to lock our thread until the caller is done with the path string
|
||||
// because any non-atomic operation on the path (such as opening a file,
|
||||
// then reading it) could be interrupted by the Go runtime where the
|
||||
// underlying thread is swapped out and the original thread is killed,
|
||||
// resulting in pull-your-hair-out-hard-to-debug issues in the caller. In
|
||||
// addition, the pre-3.17 fallback makes everything non-atomic because the
|
||||
// same thing could happen between unix.Gettid() and the path operations.
|
||||
//
|
||||
// In theory, we don't need to lock in the atomic user case when using
|
||||
// /proc/thread-self/, but it's better to be safe than sorry (and there are
|
||||
// only one or two truly atomic users of /proc/thread-self/).
|
||||
runtime.LockOSThread()
|
||||
|
||||
threadSelf := "/proc/thread-self/"
|
||||
if !haveProcThreadSelf {
|
||||
// Pre-3.17 kernels did not have /proc/thread-self, so do it manually.
|
||||
threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/"
|
||||
if _, err := os.Stat(threadSelf); err != nil {
|
||||
// Unfortunately, this code is called from rootfs_linux.go where we
|
||||
// are running inside the pid namespace of the container but /proc
|
||||
// is the host's procfs. Unfortunately there is no real way to get
|
||||
// the correct tid to use here (the kernel age means we cannot do
|
||||
// things like set up a private fsopen("proc") -- even scanning
|
||||
// NSpid in all of the tasks in /proc/self/task/*/status requires
|
||||
// Linux 4.1).
|
||||
//
|
||||
// So, we just have to assume that /proc/self is acceptable in this
|
||||
// one specific case.
|
||||
if os.Getpid() == 1 {
|
||||
logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err)
|
||||
} else {
|
||||
// This should never happen, but the fallback should work in most cases...
|
||||
logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err)
|
||||
}
|
||||
threadSelf = "/proc/self/"
|
||||
}
|
||||
}
|
||||
return threadSelf + subpath, runtime.UnlockOSThread
|
||||
}
|
||||
|
||||
// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to
|
||||
// create a /proc/thread-self handle for given file descriptor.
|
||||
//
|
||||
// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but
|
||||
// without using fmt.Sprintf to avoid unneeded overhead.
|
||||
func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
|
||||
return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
|
||||
}
|
||||
|
||||
// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"),
|
||||
// but properly handling the case where path or root are "/".
|
||||
//
|
||||
// NOTE: The return value only make sense if the path doesn't contain "..".
|
||||
func IsLexicallyInRoot(root, path string) bool {
|
||||
if root != "/" {
|
||||
root += "/"
|
||||
}
|
||||
if path != "/" {
|
||||
path += "/"
|
||||
}
|
||||
return strings.HasPrefix(path, root)
|
||||
}
|
||||
|
||||
// MkdirAllInRootOpen attempts to make
|
||||
//
|
||||
// path, _ := securejoin.SecureJoin(root, unsafePath)
|
||||
// os.MkdirAll(path, mode)
|
||||
// os.Open(path)
|
||||
//
|
||||
// safer against attacks where components in the path are changed between
|
||||
// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
|
||||
// try to detect any symlink components in the path while we are doing the
|
||||
// MkdirAll.
|
||||
//
|
||||
// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode
|
||||
// (the suid/sgid/sticky bits are not the same as for os.FileMode).
|
||||
//
|
||||
// NOTE: If unsafePath is a subpath of root, we assume that you have already
|
||||
// called SecureJoin and so we use the provided path verbatim without resolving
|
||||
// any symlinks (this is done in a way that avoids symlink-exchange races).
|
||||
// This means that the path also must not contain ".." elements, otherwise an
|
||||
// error will occur.
|
||||
//
|
||||
// This uses securejoin.MkdirAllHandle under the hood, but it has special
|
||||
// handling if unsafePath has already been scoped within the rootfs (this is
|
||||
// needed for a lot of runc callers and fixing this would require reworking a
|
||||
// lot of path logic).
|
||||
func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) {
|
||||
// If the path is already "within" the root, get the path relative to the
|
||||
// root and use that as the unsafe path. This is necessary because a lot of
|
||||
// MkdirAllInRootOpen callers have already done SecureJoin, and refactoring
|
||||
// all of them to stop using these SecureJoin'd paths would require a fair
|
||||
// amount of work.
|
||||
// TODO(cyphar): Do the refactor to libpathrs once it's ready.
|
||||
if IsLexicallyInRoot(root, unsafePath) {
|
||||
subPath, err := filepath.Rel(root, unsafePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
unsafePath = subPath
|
||||
}
|
||||
|
||||
// Check for any silly mode bits.
|
||||
if mode&^0o7777 != 0 {
|
||||
return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
|
||||
}
|
||||
// Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if
|
||||
// passed. While it would make sense to return an error in that case (since
|
||||
// the user has asked for a mode that won't be applied), for compatibility
|
||||
// reasons we have to ignore these bits.
|
||||
if ignoredBits := mode &^ 0o1777; ignoredBits != 0 {
|
||||
logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits)
|
||||
mode &= 0o1777
|
||||
}
|
||||
|
||||
rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open root handle: %w", err)
|
||||
}
|
||||
defer rootDir.Close()
|
||||
|
||||
return securejoin.MkdirAllHandle(rootDir, unsafePath, int(mode))
|
||||
}
|
||||
|
||||
// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
|
||||
// returned handle, for callers that don't need to use it.
|
||||
func MkdirAllInRoot(root, unsafePath string, mode uint32) error {
|
||||
f, err := MkdirAllInRootOpen(root, unsafePath, mode)
|
||||
if err == nil {
|
||||
_ = f.Close()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Openat is a Go-friendly openat(2) wrapper.
|
||||
func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
|
||||
dirFd := unix.AT_FDCWD
|
||||
if dir != nil {
|
||||
dirFd = int(dir.Fd())
|
||||
}
|
||||
flags |= unix.O_CLOEXEC
|
||||
|
||||
fd, err := unix.Openat(dirFd, path, flags, mode)
|
||||
if err != nil {
|
||||
return nil, &os.PathError{Op: "openat", Path: path, Err: err}
|
||||
}
|
||||
return os.NewFile(uintptr(fd), dir.Name()+"/"+path), nil
|
||||
}
|
191
e2e/vendor/github.com/opencontainers/runtime-spec/LICENSE
generated
vendored
Normal file
191
e2e/vendor/github.com/opencontainers/runtime-spec/LICENSE
generated
vendored
Normal file
@ -0,0 +1,191 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2015 The Linux Foundation.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
889
e2e/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go
generated
vendored
Normal file
889
e2e/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go
generated
vendored
Normal file
@ -0,0 +1,889 @@
|
||||
package specs
|
||||
|
||||
import "os"
|
||||
|
||||
// Spec is the base configuration for the container.
|
||||
type Spec struct {
|
||||
// Version of the Open Container Initiative Runtime Specification with which the bundle complies.
|
||||
Version string `json:"ociVersion"`
|
||||
// Process configures the container process.
|
||||
Process *Process `json:"process,omitempty"`
|
||||
// Root configures the container's root filesystem.
|
||||
Root *Root `json:"root,omitempty"`
|
||||
// Hostname configures the container's hostname.
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
// Domainname configures the container's domainname.
|
||||
Domainname string `json:"domainname,omitempty"`
|
||||
// Mounts configures additional mounts (on top of Root).
|
||||
Mounts []Mount `json:"mounts,omitempty"`
|
||||
// Hooks configures callbacks for container lifecycle events.
|
||||
Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris,zos"`
|
||||
// Annotations contains arbitrary metadata for the container.
|
||||
Annotations map[string]string `json:"annotations,omitempty"`
|
||||
|
||||
// Linux is platform-specific configuration for Linux based containers.
|
||||
Linux *Linux `json:"linux,omitempty" platform:"linux"`
|
||||
// Solaris is platform-specific configuration for Solaris based containers.
|
||||
Solaris *Solaris `json:"solaris,omitempty" platform:"solaris"`
|
||||
// Windows is platform-specific configuration for Windows based containers.
|
||||
Windows *Windows `json:"windows,omitempty" platform:"windows"`
|
||||
// VM specifies configuration for virtual-machine-based containers.
|
||||
VM *VM `json:"vm,omitempty" platform:"vm"`
|
||||
// ZOS is platform-specific configuration for z/OS based containers.
|
||||
ZOS *ZOS `json:"zos,omitempty" platform:"zos"`
|
||||
}
|
||||
|
||||
// Scheduler represents the scheduling attributes for a process. It is based on
|
||||
// the Linux sched_setattr(2) syscall.
|
||||
type Scheduler struct {
|
||||
// Policy represents the scheduling policy (e.g., SCHED_FIFO, SCHED_RR, SCHED_OTHER).
|
||||
Policy LinuxSchedulerPolicy `json:"policy"`
|
||||
|
||||
// Nice is the nice value for the process, which affects its priority.
|
||||
Nice int32 `json:"nice,omitempty"`
|
||||
|
||||
// Priority represents the static priority of the process.
|
||||
Priority int32 `json:"priority,omitempty"`
|
||||
|
||||
// Flags is an array of scheduling flags.
|
||||
Flags []LinuxSchedulerFlag `json:"flags,omitempty"`
|
||||
|
||||
// The following ones are used by the DEADLINE scheduler.
|
||||
|
||||
// Runtime is the amount of time in nanoseconds during which the process
|
||||
// is allowed to run in a given period.
|
||||
Runtime uint64 `json:"runtime,omitempty"`
|
||||
|
||||
// Deadline is the absolute deadline for the process to complete its execution.
|
||||
Deadline uint64 `json:"deadline,omitempty"`
|
||||
|
||||
// Period is the length of the period in nanoseconds used for determining the process runtime.
|
||||
Period uint64 `json:"period,omitempty"`
|
||||
}
|
||||
|
||||
// Process contains information to start a specific application inside the container.
|
||||
type Process struct {
|
||||
// Terminal creates an interactive terminal for the container.
|
||||
Terminal bool `json:"terminal,omitempty"`
|
||||
// ConsoleSize specifies the size of the console.
|
||||
ConsoleSize *Box `json:"consoleSize,omitempty"`
|
||||
// User specifies user information for the process.
|
||||
User User `json:"user"`
|
||||
// Args specifies the binary and arguments for the application to execute.
|
||||
Args []string `json:"args,omitempty"`
|
||||
// CommandLine specifies the full command line for the application to execute on Windows.
|
||||
CommandLine string `json:"commandLine,omitempty" platform:"windows"`
|
||||
// Env populates the process environment for the process.
|
||||
Env []string `json:"env,omitempty"`
|
||||
// Cwd is the current working directory for the process and must be
|
||||
// relative to the container's root.
|
||||
Cwd string `json:"cwd"`
|
||||
// Capabilities are Linux capabilities that are kept for the process.
|
||||
Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"`
|
||||
// Rlimits specifies rlimit options to apply to the process.
|
||||
Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris,zos"`
|
||||
// NoNewPrivileges controls whether additional privileges could be gained by processes in the container.
|
||||
NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"`
|
||||
// ApparmorProfile specifies the apparmor profile for the container.
|
||||
ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"`
|
||||
// Specify an oom_score_adj for the container.
|
||||
OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"`
|
||||
// Scheduler specifies the scheduling attributes for a process
|
||||
Scheduler *Scheduler `json:"scheduler,omitempty" platform:"linux"`
|
||||
// SelinuxLabel specifies the selinux context that the container process is run as.
|
||||
SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"`
|
||||
// IOPriority contains the I/O priority settings for the cgroup.
|
||||
IOPriority *LinuxIOPriority `json:"ioPriority,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// LinuxCapabilities specifies the list of allowed capabilities that are kept for a process.
|
||||
// http://man7.org/linux/man-pages/man7/capabilities.7.html
|
||||
type LinuxCapabilities struct {
|
||||
// Bounding is the set of capabilities checked by the kernel.
|
||||
Bounding []string `json:"bounding,omitempty" platform:"linux"`
|
||||
// Effective is the set of capabilities checked by the kernel.
|
||||
Effective []string `json:"effective,omitempty" platform:"linux"`
|
||||
// Inheritable is the capabilities preserved across execve.
|
||||
Inheritable []string `json:"inheritable,omitempty" platform:"linux"`
|
||||
// Permitted is the limiting superset for effective capabilities.
|
||||
Permitted []string `json:"permitted,omitempty" platform:"linux"`
|
||||
// Ambient is the ambient set of capabilities that are kept.
|
||||
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// IOPriority represents I/O priority settings for the container's processes within the process group.
|
||||
type LinuxIOPriority struct {
|
||||
Class IOPriorityClass `json:"class"`
|
||||
Priority int `json:"priority"`
|
||||
}
|
||||
|
||||
// IOPriorityClass represents an I/O scheduling class.
|
||||
type IOPriorityClass string
|
||||
|
||||
// Possible values for IOPriorityClass.
|
||||
const (
|
||||
IOPRIO_CLASS_RT IOPriorityClass = "IOPRIO_CLASS_RT"
|
||||
IOPRIO_CLASS_BE IOPriorityClass = "IOPRIO_CLASS_BE"
|
||||
IOPRIO_CLASS_IDLE IOPriorityClass = "IOPRIO_CLASS_IDLE"
|
||||
)
|
||||
|
||||
// Box specifies dimensions of a rectangle. Used for specifying the size of a console.
|
||||
type Box struct {
|
||||
// Height is the vertical dimension of a box.
|
||||
Height uint `json:"height"`
|
||||
// Width is the horizontal dimension of a box.
|
||||
Width uint `json:"width"`
|
||||
}
|
||||
|
||||
// User specifies specific user (and group) information for the container process.
|
||||
type User struct {
|
||||
// UID is the user id.
|
||||
UID uint32 `json:"uid" platform:"linux,solaris,zos"`
|
||||
// GID is the group id.
|
||||
GID uint32 `json:"gid" platform:"linux,solaris,zos"`
|
||||
// Umask is the umask for the init process.
|
||||
Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris,zos"`
|
||||
// AdditionalGids are additional group ids set for the container's process.
|
||||
AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux,solaris"`
|
||||
// Username is the user name.
|
||||
Username string `json:"username,omitempty" platform:"windows"`
|
||||
}
|
||||
|
||||
// Root contains information about the container's root filesystem on the host.
|
||||
type Root struct {
|
||||
// Path is the absolute path to the container's root filesystem.
|
||||
Path string `json:"path"`
|
||||
// Readonly makes the root filesystem for the container readonly before the process is executed.
|
||||
Readonly bool `json:"readonly,omitempty"`
|
||||
}
|
||||
|
||||
// Mount specifies a mount for a container.
|
||||
type Mount struct {
|
||||
// Destination is the absolute path where the mount will be placed in the container.
|
||||
Destination string `json:"destination"`
|
||||
// Type specifies the mount kind.
|
||||
Type string `json:"type,omitempty" platform:"linux,solaris,zos"`
|
||||
// Source specifies the source path of the mount.
|
||||
Source string `json:"source,omitempty"`
|
||||
// Options are fstab style mount options.
|
||||
Options []string `json:"options,omitempty"`
|
||||
|
||||
// UID/GID mappings used for changing file owners w/o calling chown, fs should support it.
|
||||
// Every mount point could have its own mapping.
|
||||
UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty" platform:"linux"`
|
||||
GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// Hook specifies a command that is run at a particular event in the lifecycle of a container
|
||||
type Hook struct {
|
||||
Path string `json:"path"`
|
||||
Args []string `json:"args,omitempty"`
|
||||
Env []string `json:"env,omitempty"`
|
||||
Timeout *int `json:"timeout,omitempty"`
|
||||
}
|
||||
|
||||
// Hooks specifies a command that is run in the container at a particular event in the lifecycle of a container
|
||||
// Hooks for container setup and teardown
|
||||
type Hooks struct {
|
||||
// Prestart is Deprecated. Prestart is a list of hooks to be run before the container process is executed.
|
||||
// It is called in the Runtime Namespace
|
||||
//
|
||||
// Deprecated: use [Hooks.CreateRuntime], [Hooks.CreateContainer], and
|
||||
// [Hooks.StartContainer] instead, which allow more granular hook control
|
||||
// during the create and start phase.
|
||||
Prestart []Hook `json:"prestart,omitempty"`
|
||||
// CreateRuntime is a list of hooks to be run after the container has been created but before pivot_root or any equivalent operation has been called
|
||||
// It is called in the Runtime Namespace
|
||||
CreateRuntime []Hook `json:"createRuntime,omitempty"`
|
||||
// CreateContainer is a list of hooks to be run after the container has been created but before pivot_root or any equivalent operation has been called
|
||||
// It is called in the Container Namespace
|
||||
CreateContainer []Hook `json:"createContainer,omitempty"`
|
||||
// StartContainer is a list of hooks to be run after the start operation is called but before the container process is started
|
||||
// It is called in the Container Namespace
|
||||
StartContainer []Hook `json:"startContainer,omitempty"`
|
||||
// Poststart is a list of hooks to be run after the container process is started.
|
||||
// It is called in the Runtime Namespace
|
||||
Poststart []Hook `json:"poststart,omitempty"`
|
||||
// Poststop is a list of hooks to be run after the container process exits.
|
||||
// It is called in the Runtime Namespace
|
||||
Poststop []Hook `json:"poststop,omitempty"`
|
||||
}
|
||||
|
||||
// Linux contains platform-specific configuration for Linux based containers.
|
||||
type Linux struct {
|
||||
// UIDMapping specifies user mappings for supporting user namespaces.
|
||||
UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty"`
|
||||
// GIDMapping specifies group mappings for supporting user namespaces.
|
||||
GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty"`
|
||||
// Sysctl are a set of key value pairs that are set for the container on start
|
||||
Sysctl map[string]string `json:"sysctl,omitempty"`
|
||||
// Resources contain cgroup information for handling resource constraints
|
||||
// for the container
|
||||
Resources *LinuxResources `json:"resources,omitempty"`
|
||||
// CgroupsPath specifies the path to cgroups that are created and/or joined by the container.
|
||||
// The path is expected to be relative to the cgroups mountpoint.
|
||||
// If resources are specified, the cgroups at CgroupsPath will be updated based on resources.
|
||||
CgroupsPath string `json:"cgroupsPath,omitempty"`
|
||||
// Namespaces contains the namespaces that are created and/or joined by the container
|
||||
Namespaces []LinuxNamespace `json:"namespaces,omitempty"`
|
||||
// Devices are a list of device nodes that are created for the container
|
||||
Devices []LinuxDevice `json:"devices,omitempty"`
|
||||
// Seccomp specifies the seccomp security settings for the container.
|
||||
Seccomp *LinuxSeccomp `json:"seccomp,omitempty"`
|
||||
// RootfsPropagation is the rootfs mount propagation mode for the container.
|
||||
RootfsPropagation string `json:"rootfsPropagation,omitempty"`
|
||||
// MaskedPaths masks over the provided paths inside the container.
|
||||
MaskedPaths []string `json:"maskedPaths,omitempty"`
|
||||
// ReadonlyPaths sets the provided paths as RO inside the container.
|
||||
ReadonlyPaths []string `json:"readonlyPaths,omitempty"`
|
||||
// MountLabel specifies the selinux context for the mounts in the container.
|
||||
MountLabel string `json:"mountLabel,omitempty"`
|
||||
// IntelRdt contains Intel Resource Director Technology (RDT) information for
|
||||
// handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container
|
||||
IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"`
|
||||
// Personality contains configuration for the Linux personality syscall
|
||||
Personality *LinuxPersonality `json:"personality,omitempty"`
|
||||
// TimeOffsets specifies the offset for supporting time namespaces.
|
||||
TimeOffsets map[string]LinuxTimeOffset `json:"timeOffsets,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxNamespace is the configuration for a Linux namespace
|
||||
type LinuxNamespace struct {
|
||||
// Type is the type of namespace
|
||||
Type LinuxNamespaceType `json:"type"`
|
||||
// Path is a path to an existing namespace persisted on disk that can be joined
|
||||
// and is of the same type
|
||||
Path string `json:"path,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxNamespaceType is one of the Linux namespaces
|
||||
type LinuxNamespaceType string
|
||||
|
||||
const (
|
||||
// PIDNamespace for isolating process IDs
|
||||
PIDNamespace LinuxNamespaceType = "pid"
|
||||
// NetworkNamespace for isolating network devices, stacks, ports, etc
|
||||
NetworkNamespace LinuxNamespaceType = "network"
|
||||
// MountNamespace for isolating mount points
|
||||
MountNamespace LinuxNamespaceType = "mount"
|
||||
// IPCNamespace for isolating System V IPC, POSIX message queues
|
||||
IPCNamespace LinuxNamespaceType = "ipc"
|
||||
// UTSNamespace for isolating hostname and NIS domain name
|
||||
UTSNamespace LinuxNamespaceType = "uts"
|
||||
// UserNamespace for isolating user and group IDs
|
||||
UserNamespace LinuxNamespaceType = "user"
|
||||
// CgroupNamespace for isolating cgroup hierarchies
|
||||
CgroupNamespace LinuxNamespaceType = "cgroup"
|
||||
// TimeNamespace for isolating the clocks
|
||||
TimeNamespace LinuxNamespaceType = "time"
|
||||
)
|
||||
|
||||
// LinuxIDMapping specifies UID/GID mappings
|
||||
type LinuxIDMapping struct {
|
||||
// ContainerID is the starting UID/GID in the container
|
||||
ContainerID uint32 `json:"containerID"`
|
||||
// HostID is the starting UID/GID on the host to be mapped to 'ContainerID'
|
||||
HostID uint32 `json:"hostID"`
|
||||
// Size is the number of IDs to be mapped
|
||||
Size uint32 `json:"size"`
|
||||
}
|
||||
|
||||
// LinuxTimeOffset specifies the offset for Time Namespace
|
||||
type LinuxTimeOffset struct {
|
||||
// Secs is the offset of clock (in secs) in the container
|
||||
Secs int64 `json:"secs,omitempty"`
|
||||
// Nanosecs is the additional offset for Secs (in nanosecs)
|
||||
Nanosecs uint32 `json:"nanosecs,omitempty"`
|
||||
}
|
||||
|
||||
// POSIXRlimit type and restrictions
|
||||
type POSIXRlimit struct {
|
||||
// Type of the rlimit to set
|
||||
Type string `json:"type"`
|
||||
// Hard is the hard limit for the specified type
|
||||
Hard uint64 `json:"hard"`
|
||||
// Soft is the soft limit for the specified type
|
||||
Soft uint64 `json:"soft"`
|
||||
}
|
||||
|
||||
// LinuxHugepageLimit structure corresponds to limiting kernel hugepages.
|
||||
// Default to reservation limits if supported. Otherwise fallback to page fault limits.
|
||||
type LinuxHugepageLimit struct {
|
||||
// Pagesize is the hugepage size.
|
||||
// Format: "<size><unit-prefix>B' (e.g. 64KB, 2MB, 1GB, etc.).
|
||||
Pagesize string `json:"pageSize"`
|
||||
// Limit is the limit of "hugepagesize" hugetlb reservations (if supported) or usage.
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
||||
|
||||
// LinuxInterfacePriority for network interfaces
|
||||
type LinuxInterfacePriority struct {
|
||||
// Name is the name of the network interface
|
||||
Name string `json:"name"`
|
||||
// Priority for the interface
|
||||
Priority uint32 `json:"priority"`
|
||||
}
|
||||
|
||||
// LinuxBlockIODevice holds major:minor format supported in blkio cgroup
|
||||
type LinuxBlockIODevice struct {
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
}
|
||||
|
||||
// LinuxWeightDevice struct holds a `major:minor weight` pair for weightDevice
|
||||
type LinuxWeightDevice struct {
|
||||
LinuxBlockIODevice
|
||||
// Weight is the bandwidth rate for the device.
|
||||
Weight *uint16 `json:"weight,omitempty"`
|
||||
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only
|
||||
LeafWeight *uint16 `json:"leafWeight,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxThrottleDevice struct holds a `major:minor rate_per_second` pair
|
||||
type LinuxThrottleDevice struct {
|
||||
LinuxBlockIODevice
|
||||
// Rate is the IO rate limit per cgroup per device
|
||||
Rate uint64 `json:"rate"`
|
||||
}
|
||||
|
||||
// LinuxBlockIO for Linux cgroup 'blkio' resource management
|
||||
type LinuxBlockIO struct {
|
||||
// Specifies per cgroup weight
|
||||
Weight *uint16 `json:"weight,omitempty"`
|
||||
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, CFQ scheduler only
|
||||
LeafWeight *uint16 `json:"leafWeight,omitempty"`
|
||||
// Weight per cgroup per device, can override BlkioWeight
|
||||
WeightDevice []LinuxWeightDevice `json:"weightDevice,omitempty"`
|
||||
// IO read rate limit per cgroup per device, bytes per second
|
||||
ThrottleReadBpsDevice []LinuxThrottleDevice `json:"throttleReadBpsDevice,omitempty"`
|
||||
// IO write rate limit per cgroup per device, bytes per second
|
||||
ThrottleWriteBpsDevice []LinuxThrottleDevice `json:"throttleWriteBpsDevice,omitempty"`
|
||||
// IO read rate limit per cgroup per device, IO per second
|
||||
ThrottleReadIOPSDevice []LinuxThrottleDevice `json:"throttleReadIOPSDevice,omitempty"`
|
||||
// IO write rate limit per cgroup per device, IO per second
|
||||
ThrottleWriteIOPSDevice []LinuxThrottleDevice `json:"throttleWriteIOPSDevice,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxMemory for Linux cgroup 'memory' resource management
|
||||
type LinuxMemory struct {
|
||||
// Memory limit (in bytes).
|
||||
Limit *int64 `json:"limit,omitempty"`
|
||||
// Memory reservation or soft_limit (in bytes).
|
||||
Reservation *int64 `json:"reservation,omitempty"`
|
||||
// Total memory limit (memory + swap).
|
||||
Swap *int64 `json:"swap,omitempty"`
|
||||
// Kernel memory limit (in bytes).
|
||||
//
|
||||
// Deprecated: kernel-memory limits are not supported in cgroups v2, and
|
||||
// were obsoleted in [kernel v5.4]. This field should no longer be used,
|
||||
// as it may be ignored by runtimes.
|
||||
//
|
||||
// [kernel v5.4]: https://github.com/torvalds/linux/commit/0158115f702b0ba208ab0
|
||||
Kernel *int64 `json:"kernel,omitempty"`
|
||||
// Kernel memory limit for tcp (in bytes)
|
||||
KernelTCP *int64 `json:"kernelTCP,omitempty"`
|
||||
// How aggressive the kernel will swap memory pages.
|
||||
Swappiness *uint64 `json:"swappiness,omitempty"`
|
||||
// DisableOOMKiller disables the OOM killer for out of memory conditions
|
||||
DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"`
|
||||
// Enables hierarchical memory accounting
|
||||
UseHierarchy *bool `json:"useHierarchy,omitempty"`
|
||||
// CheckBeforeUpdate enables checking if a new memory limit is lower
|
||||
// than the current usage during update, and if so, rejecting the new
|
||||
// limit.
|
||||
CheckBeforeUpdate *bool `json:"checkBeforeUpdate,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxCPU for Linux cgroup 'cpu' resource management
|
||||
type LinuxCPU struct {
|
||||
// CPU shares (relative weight (ratio) vs. other cgroups with cpu shares).
|
||||
Shares *uint64 `json:"shares,omitempty"`
|
||||
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
||||
Quota *int64 `json:"quota,omitempty"`
|
||||
// CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a
|
||||
// given period.
|
||||
Burst *uint64 `json:"burst,omitempty"`
|
||||
// CPU period to be used for hardcapping (in usecs).
|
||||
Period *uint64 `json:"period,omitempty"`
|
||||
// How much time realtime scheduling may use (in usecs).
|
||||
RealtimeRuntime *int64 `json:"realtimeRuntime,omitempty"`
|
||||
// CPU period to be used for realtime scheduling (in usecs).
|
||||
RealtimePeriod *uint64 `json:"realtimePeriod,omitempty"`
|
||||
// CPUs to use within the cpuset. Default is to use any CPU available.
|
||||
Cpus string `json:"cpus,omitempty"`
|
||||
// List of memory nodes in the cpuset. Default is to use any available memory node.
|
||||
Mems string `json:"mems,omitempty"`
|
||||
// cgroups are configured with minimum weight, 0: default behavior, 1: SCHED_IDLE.
|
||||
Idle *int64 `json:"idle,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxPids for Linux cgroup 'pids' resource management (Linux 4.3)
|
||||
type LinuxPids struct {
|
||||
// Maximum number of PIDs. Default is "no limit".
|
||||
Limit int64 `json:"limit"`
|
||||
}
|
||||
|
||||
// LinuxNetwork identification and priority configuration
|
||||
type LinuxNetwork struct {
|
||||
// Set class identifier for container's network packets
|
||||
ClassID *uint32 `json:"classID,omitempty"`
|
||||
// Set priority of network traffic for container
|
||||
Priorities []LinuxInterfacePriority `json:"priorities,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
|
||||
type LinuxRdma struct {
|
||||
// Maximum number of HCA handles that can be opened. Default is "no limit".
|
||||
HcaHandles *uint32 `json:"hcaHandles,omitempty"`
|
||||
// Maximum number of HCA objects that can be created. Default is "no limit".
|
||||
HcaObjects *uint32 `json:"hcaObjects,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxResources has container runtime resource constraints
|
||||
type LinuxResources struct {
|
||||
// Devices configures the device allowlist.
|
||||
Devices []LinuxDeviceCgroup `json:"devices,omitempty"`
|
||||
// Memory restriction configuration
|
||||
Memory *LinuxMemory `json:"memory,omitempty"`
|
||||
// CPU resource restriction configuration
|
||||
CPU *LinuxCPU `json:"cpu,omitempty"`
|
||||
// Task resource restriction configuration.
|
||||
Pids *LinuxPids `json:"pids,omitempty"`
|
||||
// BlockIO restriction configuration
|
||||
BlockIO *LinuxBlockIO `json:"blockIO,omitempty"`
|
||||
// Hugetlb limits (in bytes). Default to reservation limits if supported.
|
||||
HugepageLimits []LinuxHugepageLimit `json:"hugepageLimits,omitempty"`
|
||||
// Network restriction configuration
|
||||
Network *LinuxNetwork `json:"network,omitempty"`
|
||||
// Rdma resource restriction configuration.
|
||||
// Limits are a set of key value pairs that define RDMA resource limits,
|
||||
// where the key is device name and value is resource limits.
|
||||
Rdma map[string]LinuxRdma `json:"rdma,omitempty"`
|
||||
// Unified resources.
|
||||
Unified map[string]string `json:"unified,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxDevice represents the mknod information for a Linux special device file
|
||||
type LinuxDevice struct {
|
||||
// Path to the device.
|
||||
Path string `json:"path"`
|
||||
// Device type, block, char, etc.
|
||||
Type string `json:"type"`
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
// FileMode permission bits for the device.
|
||||
FileMode *os.FileMode `json:"fileMode,omitempty"`
|
||||
// UID of the device.
|
||||
UID *uint32 `json:"uid,omitempty"`
|
||||
// Gid of the device.
|
||||
GID *uint32 `json:"gid,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxDeviceCgroup represents a device rule for the devices specified to
|
||||
// the device controller
|
||||
type LinuxDeviceCgroup struct {
|
||||
// Allow or deny
|
||||
Allow bool `json:"allow"`
|
||||
// Device type, block, char, etc.
|
||||
Type string `json:"type,omitempty"`
|
||||
// Major is the device's major number.
|
||||
Major *int64 `json:"major,omitempty"`
|
||||
// Minor is the device's minor number.
|
||||
Minor *int64 `json:"minor,omitempty"`
|
||||
// Cgroup access permissions format, rwm.
|
||||
Access string `json:"access,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxPersonalityDomain refers to a personality domain.
|
||||
type LinuxPersonalityDomain string
|
||||
|
||||
// LinuxPersonalityFlag refers to an additional personality flag. None are currently defined.
|
||||
type LinuxPersonalityFlag string
|
||||
|
||||
// Define domain and flags for Personality
|
||||
const (
|
||||
// PerLinux is the standard Linux personality
|
||||
PerLinux LinuxPersonalityDomain = "LINUX"
|
||||
// PerLinux32 sets personality to 32 bit
|
||||
PerLinux32 LinuxPersonalityDomain = "LINUX32"
|
||||
)
|
||||
|
||||
// LinuxPersonality represents the Linux personality syscall input
|
||||
type LinuxPersonality struct {
|
||||
// Domain for the personality
|
||||
Domain LinuxPersonalityDomain `json:"domain"`
|
||||
// Additional flags
|
||||
Flags []LinuxPersonalityFlag `json:"flags,omitempty"`
|
||||
}
|
||||
|
||||
// Solaris contains platform-specific configuration for Solaris application containers.
|
||||
type Solaris struct {
|
||||
// SMF FMRI which should go "online" before we start the container process.
|
||||
Milestone string `json:"milestone,omitempty"`
|
||||
// Maximum set of privileges any process in this container can obtain.
|
||||
LimitPriv string `json:"limitpriv,omitempty"`
|
||||
// The maximum amount of shared memory allowed for this container.
|
||||
MaxShmMemory string `json:"maxShmMemory,omitempty"`
|
||||
// Specification for automatic creation of network resources for this container.
|
||||
Anet []SolarisAnet `json:"anet,omitempty"`
|
||||
// Set limit on the amount of CPU time that can be used by container.
|
||||
CappedCPU *SolarisCappedCPU `json:"cappedCPU,omitempty"`
|
||||
// The physical and swap caps on the memory that can be used by this container.
|
||||
CappedMemory *SolarisCappedMemory `json:"cappedMemory,omitempty"`
|
||||
}
|
||||
|
||||
// SolarisCappedCPU allows users to set limit on the amount of CPU time that can be used by container.
|
||||
type SolarisCappedCPU struct {
|
||||
Ncpus string `json:"ncpus,omitempty"`
|
||||
}
|
||||
|
||||
// SolarisCappedMemory allows users to set the physical and swap caps on the memory that can be used by this container.
|
||||
type SolarisCappedMemory struct {
|
||||
Physical string `json:"physical,omitempty"`
|
||||
Swap string `json:"swap,omitempty"`
|
||||
}
|
||||
|
||||
// SolarisAnet provides the specification for automatic creation of network resources for this container.
|
||||
type SolarisAnet struct {
|
||||
// Specify a name for the automatically created VNIC datalink.
|
||||
Linkname string `json:"linkname,omitempty"`
|
||||
// Specify the link over which the VNIC will be created.
|
||||
Lowerlink string `json:"lowerLink,omitempty"`
|
||||
// The set of IP addresses that the container can use.
|
||||
Allowedaddr string `json:"allowedAddress,omitempty"`
|
||||
// Specifies whether allowedAddress limitation is to be applied to the VNIC.
|
||||
Configallowedaddr string `json:"configureAllowedAddress,omitempty"`
|
||||
// The value of the optional default router.
|
||||
Defrouter string `json:"defrouter,omitempty"`
|
||||
// Enable one or more types of link protection.
|
||||
Linkprotection string `json:"linkProtection,omitempty"`
|
||||
// Set the VNIC's macAddress
|
||||
Macaddress string `json:"macAddress,omitempty"`
|
||||
}
|
||||
|
||||
// Windows defines the runtime configuration for Windows based containers, including Hyper-V containers.
|
||||
type Windows struct {
|
||||
// LayerFolders contains a list of absolute paths to directories containing image layers.
|
||||
LayerFolders []string `json:"layerFolders"`
|
||||
// Devices are the list of devices to be mapped into the container.
|
||||
Devices []WindowsDevice `json:"devices,omitempty"`
|
||||
// Resources contains information for handling resource constraints for the container.
|
||||
Resources *WindowsResources `json:"resources,omitempty"`
|
||||
// CredentialSpec contains a JSON object describing a group Managed Service Account (gMSA) specification.
|
||||
CredentialSpec interface{} `json:"credentialSpec,omitempty"`
|
||||
// Servicing indicates if the container is being started in a mode to apply a Windows Update servicing operation.
|
||||
Servicing bool `json:"servicing,omitempty"`
|
||||
// IgnoreFlushesDuringBoot indicates if the container is being started in a mode where disk writes are not flushed during its boot process.
|
||||
IgnoreFlushesDuringBoot bool `json:"ignoreFlushesDuringBoot,omitempty"`
|
||||
// HyperV contains information for running a container with Hyper-V isolation.
|
||||
HyperV *WindowsHyperV `json:"hyperv,omitempty"`
|
||||
// Network restriction configuration.
|
||||
Network *WindowsNetwork `json:"network,omitempty"`
|
||||
}
|
||||
|
||||
// WindowsDevice represents information about a host device to be mapped into the container.
|
||||
type WindowsDevice struct {
|
||||
// Device identifier: interface class GUID, etc.
|
||||
ID string `json:"id"`
|
||||
// Device identifier type: "class", etc.
|
||||
IDType string `json:"idType"`
|
||||
}
|
||||
|
||||
// WindowsResources has container runtime resource constraints for containers running on Windows.
|
||||
type WindowsResources struct {
|
||||
// Memory restriction configuration.
|
||||
Memory *WindowsMemoryResources `json:"memory,omitempty"`
|
||||
// CPU resource restriction configuration.
|
||||
CPU *WindowsCPUResources `json:"cpu,omitempty"`
|
||||
// Storage restriction configuration.
|
||||
Storage *WindowsStorageResources `json:"storage,omitempty"`
|
||||
}
|
||||
|
||||
// WindowsMemoryResources contains memory resource management settings.
|
||||
type WindowsMemoryResources struct {
|
||||
// Memory limit in bytes.
|
||||
Limit *uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
// WindowsCPUResources contains CPU resource management settings.
|
||||
type WindowsCPUResources struct {
|
||||
// Count is the number of CPUs available to the container. It represents the
|
||||
// fraction of the configured processor `count` in a container in relation
|
||||
// to the processors available in the host. The fraction ultimately
|
||||
// determines the portion of processor cycles that the threads in a
|
||||
// container can use during each scheduling interval, as the number of
|
||||
// cycles per 10,000 cycles.
|
||||
Count *uint64 `json:"count,omitempty"`
|
||||
// Shares limits the share of processor time given to the container relative
|
||||
// to other workloads on the processor. The processor `shares` (`weight` at
|
||||
// the platform level) is a value between 0 and 10000.
|
||||
Shares *uint16 `json:"shares,omitempty"`
|
||||
// Maximum determines the portion of processor cycles that the threads in a
|
||||
// container can use during each scheduling interval, as the number of
|
||||
// cycles per 10,000 cycles. Set processor `maximum` to a percentage times
|
||||
// 100.
|
||||
Maximum *uint16 `json:"maximum,omitempty"`
|
||||
}
|
||||
|
||||
// WindowsStorageResources contains storage resource management settings.
|
||||
type WindowsStorageResources struct {
|
||||
// Specifies maximum Iops for the system drive.
|
||||
Iops *uint64 `json:"iops,omitempty"`
|
||||
// Specifies maximum bytes per second for the system drive.
|
||||
Bps *uint64 `json:"bps,omitempty"`
|
||||
// Sandbox size specifies the minimum size of the system drive in bytes.
|
||||
SandboxSize *uint64 `json:"sandboxSize,omitempty"`
|
||||
}
|
||||
|
||||
// WindowsNetwork contains network settings for Windows containers.
|
||||
type WindowsNetwork struct {
|
||||
// List of HNS endpoints that the container should connect to.
|
||||
EndpointList []string `json:"endpointList,omitempty"`
|
||||
// Specifies if unqualified DNS name resolution is allowed.
|
||||
AllowUnqualifiedDNSQuery bool `json:"allowUnqualifiedDNSQuery,omitempty"`
|
||||
// Comma separated list of DNS suffixes to use for name resolution.
|
||||
DNSSearchList []string `json:"DNSSearchList,omitempty"`
|
||||
// Name (ID) of the container that we will share with the network stack.
|
||||
NetworkSharedContainerName string `json:"networkSharedContainerName,omitempty"`
|
||||
// name (ID) of the network namespace that will be used for the container.
|
||||
NetworkNamespace string `json:"networkNamespace,omitempty"`
|
||||
}
|
||||
|
||||
// WindowsHyperV contains information for configuring a container to run with Hyper-V isolation.
|
||||
type WindowsHyperV struct {
|
||||
// UtilityVMPath is an optional path to the image used for the Utility VM.
|
||||
UtilityVMPath string `json:"utilityVMPath,omitempty"`
|
||||
}
|
||||
|
||||
// VM contains information for virtual-machine-based containers.
|
||||
type VM struct {
|
||||
// Hypervisor specifies hypervisor-related configuration for virtual-machine-based containers.
|
||||
Hypervisor VMHypervisor `json:"hypervisor,omitempty"`
|
||||
// Kernel specifies kernel-related configuration for virtual-machine-based containers.
|
||||
Kernel VMKernel `json:"kernel"`
|
||||
// Image specifies guest image related configuration for virtual-machine-based containers.
|
||||
Image VMImage `json:"image,omitempty"`
|
||||
}
|
||||
|
||||
// VMHypervisor contains information about the hypervisor to use for a virtual machine.
|
||||
type VMHypervisor struct {
|
||||
// Path is the host path to the hypervisor used to manage the virtual machine.
|
||||
Path string `json:"path"`
|
||||
// Parameters specifies parameters to pass to the hypervisor.
|
||||
Parameters []string `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
// VMKernel contains information about the kernel to use for a virtual machine.
|
||||
type VMKernel struct {
|
||||
// Path is the host path to the kernel used to boot the virtual machine.
|
||||
Path string `json:"path"`
|
||||
// Parameters specifies parameters to pass to the kernel.
|
||||
Parameters []string `json:"parameters,omitempty"`
|
||||
// InitRD is the host path to an initial ramdisk to be used by the kernel.
|
||||
InitRD string `json:"initrd,omitempty"`
|
||||
}
|
||||
|
||||
// VMImage contains information about the virtual machine root image.
|
||||
type VMImage struct {
|
||||
// Path is the host path to the root image that the VM kernel would boot into.
|
||||
Path string `json:"path"`
|
||||
// Format is the root image format type (e.g. "qcow2", "raw", "vhd", etc).
|
||||
Format string `json:"format"`
|
||||
}
|
||||
|
||||
// LinuxSeccomp represents syscall restrictions
|
||||
type LinuxSeccomp struct {
|
||||
DefaultAction LinuxSeccompAction `json:"defaultAction"`
|
||||
DefaultErrnoRet *uint `json:"defaultErrnoRet,omitempty"`
|
||||
Architectures []Arch `json:"architectures,omitempty"`
|
||||
Flags []LinuxSeccompFlag `json:"flags,omitempty"`
|
||||
ListenerPath string `json:"listenerPath,omitempty"`
|
||||
ListenerMetadata string `json:"listenerMetadata,omitempty"`
|
||||
Syscalls []LinuxSyscall `json:"syscalls,omitempty"`
|
||||
}
|
||||
|
||||
// Arch used for additional architectures
|
||||
type Arch string
|
||||
|
||||
// LinuxSeccompFlag is a flag to pass to seccomp(2).
|
||||
type LinuxSeccompFlag string
|
||||
|
||||
const (
|
||||
// LinuxSeccompFlagLog is a seccomp flag to request all returned
|
||||
// actions except SECCOMP_RET_ALLOW to be logged. An administrator may
|
||||
// override this filter flag by preventing specific actions from being
|
||||
// logged via the /proc/sys/kernel/seccomp/actions_logged file. (since
|
||||
// Linux 4.14)
|
||||
LinuxSeccompFlagLog LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_LOG"
|
||||
|
||||
// LinuxSeccompFlagSpecAllow can be used to disable Speculative Store
|
||||
// Bypass mitigation. (since Linux 4.17)
|
||||
LinuxSeccompFlagSpecAllow LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_SPEC_ALLOW"
|
||||
|
||||
// LinuxSeccompFlagWaitKillableRecv can be used to switch to the wait
|
||||
// killable semantics. (since Linux 5.19)
|
||||
LinuxSeccompFlagWaitKillableRecv LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV"
|
||||
)
|
||||
|
||||
// Additional architectures permitted to be used for system calls
|
||||
// By default only the native architecture of the kernel is permitted
|
||||
const (
|
||||
ArchX86 Arch = "SCMP_ARCH_X86"
|
||||
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
|
||||
ArchX32 Arch = "SCMP_ARCH_X32"
|
||||
ArchARM Arch = "SCMP_ARCH_ARM"
|
||||
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
|
||||
ArchMIPS Arch = "SCMP_ARCH_MIPS"
|
||||
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
|
||||
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
|
||||
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
|
||||
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
|
||||
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
|
||||
ArchPPC Arch = "SCMP_ARCH_PPC"
|
||||
ArchPPC64 Arch = "SCMP_ARCH_PPC64"
|
||||
ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE"
|
||||
ArchS390 Arch = "SCMP_ARCH_S390"
|
||||
ArchS390X Arch = "SCMP_ARCH_S390X"
|
||||
ArchPARISC Arch = "SCMP_ARCH_PARISC"
|
||||
ArchPARISC64 Arch = "SCMP_ARCH_PARISC64"
|
||||
ArchRISCV64 Arch = "SCMP_ARCH_RISCV64"
|
||||
)
|
||||
|
||||
// LinuxSeccompAction taken upon Seccomp rule match
|
||||
type LinuxSeccompAction string
|
||||
|
||||
// Define actions for Seccomp rules
|
||||
const (
|
||||
ActKill LinuxSeccompAction = "SCMP_ACT_KILL"
|
||||
ActKillProcess LinuxSeccompAction = "SCMP_ACT_KILL_PROCESS"
|
||||
ActKillThread LinuxSeccompAction = "SCMP_ACT_KILL_THREAD"
|
||||
ActTrap LinuxSeccompAction = "SCMP_ACT_TRAP"
|
||||
ActErrno LinuxSeccompAction = "SCMP_ACT_ERRNO"
|
||||
ActTrace LinuxSeccompAction = "SCMP_ACT_TRACE"
|
||||
ActAllow LinuxSeccompAction = "SCMP_ACT_ALLOW"
|
||||
ActLog LinuxSeccompAction = "SCMP_ACT_LOG"
|
||||
ActNotify LinuxSeccompAction = "SCMP_ACT_NOTIFY"
|
||||
)
|
||||
|
||||
// LinuxSeccompOperator used to match syscall arguments in Seccomp
|
||||
type LinuxSeccompOperator string
|
||||
|
||||
// Define operators for syscall arguments in Seccomp
|
||||
const (
|
||||
OpNotEqual LinuxSeccompOperator = "SCMP_CMP_NE"
|
||||
OpLessThan LinuxSeccompOperator = "SCMP_CMP_LT"
|
||||
OpLessEqual LinuxSeccompOperator = "SCMP_CMP_LE"
|
||||
OpEqualTo LinuxSeccompOperator = "SCMP_CMP_EQ"
|
||||
OpGreaterEqual LinuxSeccompOperator = "SCMP_CMP_GE"
|
||||
OpGreaterThan LinuxSeccompOperator = "SCMP_CMP_GT"
|
||||
OpMaskedEqual LinuxSeccompOperator = "SCMP_CMP_MASKED_EQ"
|
||||
)
|
||||
|
||||
// LinuxSeccompArg used for matching specific syscall arguments in Seccomp
|
||||
type LinuxSeccompArg struct {
|
||||
Index uint `json:"index"`
|
||||
Value uint64 `json:"value"`
|
||||
ValueTwo uint64 `json:"valueTwo,omitempty"`
|
||||
Op LinuxSeccompOperator `json:"op"`
|
||||
}
|
||||
|
||||
// LinuxSyscall is used to match a syscall in Seccomp
|
||||
type LinuxSyscall struct {
|
||||
Names []string `json:"names"`
|
||||
Action LinuxSeccompAction `json:"action"`
|
||||
ErrnoRet *uint `json:"errnoRet,omitempty"`
|
||||
Args []LinuxSeccompArg `json:"args,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxIntelRdt has container runtime resource constraints for Intel RDT CAT and MBA
|
||||
// features and flags enabling Intel RDT CMT and MBM features.
|
||||
// Intel RDT features are available in Linux 4.14 and newer kernel versions.
|
||||
type LinuxIntelRdt struct {
|
||||
// The identity for RDT Class of Service
|
||||
ClosID string `json:"closID,omitempty"`
|
||||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3CacheSchema,omitempty"`
|
||||
|
||||
// The schema of memory bandwidth per L3 cache id
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
// The unit of memory bandwidth is specified in "percentages" by
|
||||
// default, and in "MBps" if MBA Software Controller is enabled.
|
||||
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||
|
||||
// EnableCMT is the flag to indicate if the Intel RDT CMT is enabled. CMT (Cache Monitoring Technology) supports monitoring of
|
||||
// the last-level cache (LLC) occupancy for the container.
|
||||
EnableCMT bool `json:"enableCMT,omitempty"`
|
||||
|
||||
// EnableMBM is the flag to indicate if the Intel RDT MBM is enabled. MBM (Memory Bandwidth Monitoring) supports monitoring of
|
||||
// total and local memory bandwidth for the container.
|
||||
EnableMBM bool `json:"enableMBM,omitempty"`
|
||||
}
|
||||
|
||||
// ZOS contains platform-specific configuration for z/OS based containers.
|
||||
type ZOS struct {
|
||||
// Devices are a list of device nodes that are created for the container
|
||||
Devices []ZOSDevice `json:"devices,omitempty"`
|
||||
}
|
||||
|
||||
// ZOSDevice represents the mknod information for a z/OS special device file
|
||||
type ZOSDevice struct {
|
||||
// Path to the device.
|
||||
Path string `json:"path"`
|
||||
// Device type, block, char, etc.
|
||||
Type string `json:"type"`
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
// FileMode permission bits for the device.
|
||||
FileMode *os.FileMode `json:"fileMode,omitempty"`
|
||||
// UID of the device.
|
||||
UID *uint32 `json:"uid,omitempty"`
|
||||
// Gid of the device.
|
||||
GID *uint32 `json:"gid,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxSchedulerPolicy represents different scheduling policies used with the Linux Scheduler
|
||||
type LinuxSchedulerPolicy string
|
||||
|
||||
const (
|
||||
// SchedOther is the default scheduling policy
|
||||
SchedOther LinuxSchedulerPolicy = "SCHED_OTHER"
|
||||
// SchedFIFO is the First-In-First-Out scheduling policy
|
||||
SchedFIFO LinuxSchedulerPolicy = "SCHED_FIFO"
|
||||
// SchedRR is the Round-Robin scheduling policy
|
||||
SchedRR LinuxSchedulerPolicy = "SCHED_RR"
|
||||
// SchedBatch is the Batch scheduling policy
|
||||
SchedBatch LinuxSchedulerPolicy = "SCHED_BATCH"
|
||||
// SchedISO is the Isolation scheduling policy
|
||||
SchedISO LinuxSchedulerPolicy = "SCHED_ISO"
|
||||
// SchedIdle is the Idle scheduling policy
|
||||
SchedIdle LinuxSchedulerPolicy = "SCHED_IDLE"
|
||||
// SchedDeadline is the Deadline scheduling policy
|
||||
SchedDeadline LinuxSchedulerPolicy = "SCHED_DEADLINE"
|
||||
)
|
||||
|
||||
// LinuxSchedulerFlag represents the flags used by the Linux Scheduler.
|
||||
type LinuxSchedulerFlag string
|
||||
|
||||
const (
|
||||
// SchedFlagResetOnFork represents the reset on fork scheduling flag
|
||||
SchedFlagResetOnFork LinuxSchedulerFlag = "SCHED_FLAG_RESET_ON_FORK"
|
||||
// SchedFlagReclaim represents the reclaim scheduling flag
|
||||
SchedFlagReclaim LinuxSchedulerFlag = "SCHED_FLAG_RECLAIM"
|
||||
// SchedFlagDLOverrun represents the deadline overrun scheduling flag
|
||||
SchedFlagDLOverrun LinuxSchedulerFlag = "SCHED_FLAG_DL_OVERRUN"
|
||||
// SchedFlagKeepPolicy represents the keep policy scheduling flag
|
||||
SchedFlagKeepPolicy LinuxSchedulerFlag = "SCHED_FLAG_KEEP_POLICY"
|
||||
// SchedFlagKeepParams represents the keep parameters scheduling flag
|
||||
SchedFlagKeepParams LinuxSchedulerFlag = "SCHED_FLAG_KEEP_PARAMS"
|
||||
// SchedFlagUtilClampMin represents the utilization clamp minimum scheduling flag
|
||||
SchedFlagUtilClampMin LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MIN"
|
||||
// SchedFlagUtilClampMin represents the utilization clamp maximum scheduling flag
|
||||
SchedFlagUtilClampMax LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MAX"
|
||||
)
|
56
e2e/vendor/github.com/opencontainers/runtime-spec/specs-go/state.go
generated
vendored
Normal file
56
e2e/vendor/github.com/opencontainers/runtime-spec/specs-go/state.go
generated
vendored
Normal file
@ -0,0 +1,56 @@
|
||||
package specs
|
||||
|
||||
// ContainerState represents the state of a container.
|
||||
type ContainerState string
|
||||
|
||||
const (
|
||||
// StateCreating indicates that the container is being created
|
||||
StateCreating ContainerState = "creating"
|
||||
|
||||
// StateCreated indicates that the runtime has finished the create operation
|
||||
StateCreated ContainerState = "created"
|
||||
|
||||
// StateRunning indicates that the container process has executed the
|
||||
// user-specified program but has not exited
|
||||
StateRunning ContainerState = "running"
|
||||
|
||||
// StateStopped indicates that the container process has exited
|
||||
StateStopped ContainerState = "stopped"
|
||||
)
|
||||
|
||||
// State holds information about the runtime state of the container.
|
||||
type State struct {
|
||||
// Version is the version of the specification that is supported.
|
||||
Version string `json:"ociVersion"`
|
||||
// ID is the container ID
|
||||
ID string `json:"id"`
|
||||
// Status is the runtime status of the container.
|
||||
Status ContainerState `json:"status"`
|
||||
// Pid is the process ID for the container process.
|
||||
Pid int `json:"pid,omitempty"`
|
||||
// Bundle is the path to the container's bundle directory.
|
||||
Bundle string `json:"bundle"`
|
||||
// Annotations are key values associated with the container.
|
||||
Annotations map[string]string `json:"annotations,omitempty"`
|
||||
}
|
||||
|
||||
const (
|
||||
// SeccompFdName is the name of the seccomp notify file descriptor.
|
||||
SeccompFdName string = "seccompFd"
|
||||
)
|
||||
|
||||
// ContainerProcessState holds information about the state of a container process.
|
||||
type ContainerProcessState struct {
|
||||
// Version is the version of the specification that is supported.
|
||||
Version string `json:"ociVersion"`
|
||||
// Fds is a string array containing the names of the file descriptors passed.
|
||||
// The index of the name in this array corresponds to index of the file
|
||||
// descriptor in the `SCM_RIGHTS` array.
|
||||
Fds []string `json:"fds"`
|
||||
// Pid is the process ID as seen by the runtime.
|
||||
Pid int `json:"pid"`
|
||||
// Opaque metadata.
|
||||
Metadata string `json:"metadata,omitempty"`
|
||||
// State of the container.
|
||||
State State `json:"state"`
|
||||
}
|
18
e2e/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go
generated
vendored
Normal file
18
e2e/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go
generated
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
package specs
|
||||
|
||||
import "fmt"
|
||||
|
||||
const (
|
||||
// VersionMajor is for an API incompatible changes
|
||||
VersionMajor = 1
|
||||
// VersionMinor is for functionality in a backwards-compatible manner
|
||||
VersionMinor = 2
|
||||
// VersionPatch is for backwards-compatible bug fixes
|
||||
VersionPatch = 0
|
||||
|
||||
// VersionDev indicates development branch. Releases will be empty string.
|
||||
VersionDev = ""
|
||||
)
|
||||
|
||||
// Version is the specification version that the package types support.
|
||||
var Version = fmt.Sprintf("%d.%d.%d%s", VersionMajor, VersionMinor, VersionPatch, VersionDev)
|
201
e2e/vendor/github.com/opencontainers/selinux/LICENSE
generated
vendored
Normal file
201
e2e/vendor/github.com/opencontainers/selinux/LICENSE
generated
vendored
Normal file
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
13
e2e/vendor/github.com/opencontainers/selinux/go-selinux/doc.go
generated
vendored
Normal file
13
e2e/vendor/github.com/opencontainers/selinux/go-selinux/doc.go
generated
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
/*
|
||||
Package selinux provides a high-level interface for interacting with selinux.
|
||||
|
||||
Usage:
|
||||
|
||||
import "github.com/opencontainers/selinux/go-selinux"
|
||||
|
||||
// Ensure that selinux is enforcing mode.
|
||||
if selinux.EnforceMode() != selinux.Enforcing {
|
||||
selinux.SetEnforceMode(selinux.Enforcing)
|
||||
}
|
||||
*/
|
||||
package selinux
|
115
e2e/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go
generated
vendored
Normal file
115
e2e/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go
generated
vendored
Normal file
@ -0,0 +1,115 @@
|
||||
package label
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
)
|
||||
|
||||
// Deprecated: use selinux.ROFileLabel
|
||||
var ROMountLabel = selinux.ROFileLabel
|
||||
|
||||
// SetProcessLabel takes a process label and tells the kernel to assign the
|
||||
// label to the next program executed by the current process.
|
||||
// Deprecated: use selinux.SetExecLabel
|
||||
var SetProcessLabel = selinux.SetExecLabel
|
||||
|
||||
// ProcessLabel returns the process label that the kernel will assign
|
||||
// to the next program executed by the current process. If "" is returned
|
||||
// this indicates that the default labeling will happen for the process.
|
||||
// Deprecated: use selinux.ExecLabel
|
||||
var ProcessLabel = selinux.ExecLabel
|
||||
|
||||
// SetSocketLabel takes a process label and tells the kernel to assign the
|
||||
// label to the next socket that gets created
|
||||
// Deprecated: use selinux.SetSocketLabel
|
||||
var SetSocketLabel = selinux.SetSocketLabel
|
||||
|
||||
// SocketLabel retrieves the current default socket label setting
|
||||
// Deprecated: use selinux.SocketLabel
|
||||
var SocketLabel = selinux.SocketLabel
|
||||
|
||||
// SetKeyLabel takes a process label and tells the kernel to assign the
|
||||
// label to the next kernel keyring that gets created
|
||||
// Deprecated: use selinux.SetKeyLabel
|
||||
var SetKeyLabel = selinux.SetKeyLabel
|
||||
|
||||
// KeyLabel retrieves the current default kernel keyring label setting
|
||||
// Deprecated: use selinux.KeyLabel
|
||||
var KeyLabel = selinux.KeyLabel
|
||||
|
||||
// FileLabel returns the label for specified path
|
||||
// Deprecated: use selinux.FileLabel
|
||||
var FileLabel = selinux.FileLabel
|
||||
|
||||
// PidLabel will return the label of the process running with the specified pid
|
||||
// Deprecated: use selinux.PidLabel
|
||||
var PidLabel = selinux.PidLabel
|
||||
|
||||
// Init initialises the labeling system
|
||||
func Init() {
|
||||
_ = selinux.GetEnabled()
|
||||
}
|
||||
|
||||
// ClearLabels will clear all reserved labels
|
||||
// Deprecated: use selinux.ClearLabels
|
||||
var ClearLabels = selinux.ClearLabels
|
||||
|
||||
// ReserveLabel will record the fact that the MCS label has already been used.
|
||||
// This will prevent InitLabels from using the MCS label in a newly created
|
||||
// container
|
||||
// Deprecated: use selinux.ReserveLabel
|
||||
func ReserveLabel(label string) error {
|
||||
selinux.ReserveLabel(label)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReleaseLabel will remove the reservation of the MCS label.
|
||||
// This will allow InitLabels to use the MCS label in a newly created
|
||||
// containers
|
||||
// Deprecated: use selinux.ReleaseLabel
|
||||
func ReleaseLabel(label string) error {
|
||||
selinux.ReleaseLabel(label)
|
||||
return nil
|
||||
}
|
||||
|
||||
// DupSecOpt takes a process label and returns security options that
|
||||
// can be used to set duplicate labels on future container processes
|
||||
// Deprecated: use selinux.DupSecOpt
|
||||
var DupSecOpt = selinux.DupSecOpt
|
||||
|
||||
// FormatMountLabel returns a string to be used by the mount command. Using
|
||||
// the SELinux `context` mount option. Changing labels of files on mount
|
||||
// points with this option can never be changed.
|
||||
// FormatMountLabel returns a string to be used by the mount command.
|
||||
// The format of this string will be used to alter the labeling of the mountpoint.
|
||||
// The string returned is suitable to be used as the options field of the mount command.
|
||||
// If you need to have additional mount point options, you can pass them in as
|
||||
// the first parameter. Second parameter is the label that you wish to apply
|
||||
// to all content in the mount point.
|
||||
func FormatMountLabel(src, mountLabel string) string {
|
||||
return FormatMountLabelByType(src, mountLabel, "context")
|
||||
}
|
||||
|
||||
// FormatMountLabelByType returns a string to be used by the mount command.
|
||||
// Allow caller to specify the mount options. For example using the SELinux
|
||||
// `fscontext` mount option would allow certain container processes to change
|
||||
// labels of files created on the mount points, where as `context` option does
|
||||
// not.
|
||||
// FormatMountLabelByType returns a string to be used by the mount command.
|
||||
// The format of this string will be used to alter the labeling of the mountpoint.
|
||||
// The string returned is suitable to be used as the options field of the mount command.
|
||||
// If you need to have additional mount point options, you can pass them in as
|
||||
// the first parameter. Second parameter is the label that you wish to apply
|
||||
// to all content in the mount point.
|
||||
func FormatMountLabelByType(src, mountLabel, contextType string) string {
|
||||
if mountLabel != "" {
|
||||
switch src {
|
||||
case "":
|
||||
src = fmt.Sprintf("%s=%q", contextType, mountLabel)
|
||||
default:
|
||||
src = fmt.Sprintf("%s,%s=%q", src, contextType, mountLabel)
|
||||
}
|
||||
}
|
||||
return src
|
||||
}
|
147
e2e/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go
generated
vendored
Normal file
147
e2e/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go
generated
vendored
Normal file
@ -0,0 +1,147 @@
|
||||
package label
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
)
|
||||
|
||||
// Valid Label Options
|
||||
var validOptions = map[string]bool{
|
||||
"disable": true,
|
||||
"type": true,
|
||||
"filetype": true,
|
||||
"user": true,
|
||||
"role": true,
|
||||
"level": true,
|
||||
}
|
||||
|
||||
var ErrIncompatibleLabel = errors.New("Bad SELinux option z and Z can not be used together")
|
||||
|
||||
// InitLabels returns the process label and file labels to be used within
|
||||
// the container. A list of options can be passed into this function to alter
|
||||
// the labels. The labels returned will include a random MCS String, that is
|
||||
// guaranteed to be unique.
|
||||
// If the disabled flag is passed in, the process label will not be set, but the mount label will be set
|
||||
// to the container_file label with the maximum category. This label is not usable by any confined label.
|
||||
func InitLabels(options []string) (plabel string, mlabel string, retErr error) {
|
||||
if !selinux.GetEnabled() {
|
||||
return "", "", nil
|
||||
}
|
||||
processLabel, mountLabel := selinux.ContainerLabels()
|
||||
if processLabel != "" {
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
selinux.ReleaseLabel(mountLabel)
|
||||
}
|
||||
}()
|
||||
pcon, err := selinux.NewContext(processLabel)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
mcsLevel := pcon["level"]
|
||||
mcon, err := selinux.NewContext(mountLabel)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
for _, opt := range options {
|
||||
if opt == "disable" {
|
||||
selinux.ReleaseLabel(mountLabel)
|
||||
return "", selinux.PrivContainerMountLabel(), nil
|
||||
}
|
||||
if i := strings.Index(opt, ":"); i == -1 {
|
||||
return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt)
|
||||
}
|
||||
con := strings.SplitN(opt, ":", 2)
|
||||
if !validOptions[con[0]] {
|
||||
return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0])
|
||||
}
|
||||
if con[0] == "filetype" {
|
||||
mcon["type"] = con[1]
|
||||
continue
|
||||
}
|
||||
pcon[con[0]] = con[1]
|
||||
if con[0] == "level" || con[0] == "user" {
|
||||
mcon[con[0]] = con[1]
|
||||
}
|
||||
}
|
||||
if pcon.Get() != processLabel {
|
||||
if pcon["level"] != mcsLevel {
|
||||
selinux.ReleaseLabel(processLabel)
|
||||
}
|
||||
processLabel = pcon.Get()
|
||||
selinux.ReserveLabel(processLabel)
|
||||
}
|
||||
mountLabel = mcon.Get()
|
||||
}
|
||||
return processLabel, mountLabel, nil
|
||||
}
|
||||
|
||||
// Deprecated: The GenLabels function is only to be used during the transition
|
||||
// to the official API. Use InitLabels(strings.Fields(options)) instead.
|
||||
func GenLabels(options string) (string, string, error) {
|
||||
return InitLabels(strings.Fields(options))
|
||||
}
|
||||
|
||||
// SetFileLabel modifies the "path" label to the specified file label
|
||||
func SetFileLabel(path string, fileLabel string) error {
|
||||
if !selinux.GetEnabled() || fileLabel == "" {
|
||||
return nil
|
||||
}
|
||||
return selinux.SetFileLabel(path, fileLabel)
|
||||
}
|
||||
|
||||
// SetFileCreateLabel tells the kernel the label for all files to be created
|
||||
func SetFileCreateLabel(fileLabel string) error {
|
||||
if !selinux.GetEnabled() {
|
||||
return nil
|
||||
}
|
||||
return selinux.SetFSCreateLabel(fileLabel)
|
||||
}
|
||||
|
||||
// Relabel changes the label of path and all the entries beneath the path.
|
||||
// It changes the MCS label to s0 if shared is true.
|
||||
// This will allow all containers to share the content.
|
||||
//
|
||||
// The path itself is guaranteed to be relabeled last.
|
||||
func Relabel(path string, fileLabel string, shared bool) error {
|
||||
if !selinux.GetEnabled() || fileLabel == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
if shared {
|
||||
c, err := selinux.NewContext(fileLabel)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c["level"] = "s0"
|
||||
fileLabel = c.Get()
|
||||
}
|
||||
return selinux.Chcon(path, fileLabel, true)
|
||||
}
|
||||
|
||||
// DisableSecOpt returns a security opt that can disable labeling
|
||||
// support for future container processes
|
||||
// Deprecated: use selinux.DisableSecOpt
|
||||
var DisableSecOpt = selinux.DisableSecOpt
|
||||
|
||||
// Validate checks that the label does not include unexpected options
|
||||
func Validate(label string) error {
|
||||
if strings.Contains(label, "z") && strings.Contains(label, "Z") {
|
||||
return ErrIncompatibleLabel
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// RelabelNeeded checks whether the user requested a relabel
|
||||
func RelabelNeeded(label string) bool {
|
||||
return strings.Contains(label, "z") || strings.Contains(label, "Z")
|
||||
}
|
||||
|
||||
// IsShared checks that the label includes a "shared" mark
|
||||
func IsShared(label string) bool {
|
||||
return strings.Contains(label, "z")
|
||||
}
|
50
e2e/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go
generated
vendored
Normal file
50
e2e/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go
generated
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package label
|
||||
|
||||
// InitLabels returns the process label and file labels to be used within
|
||||
// the container. A list of options can be passed into this function to alter
|
||||
// the labels.
|
||||
func InitLabels([]string) (string, string, error) {
|
||||
return "", "", nil
|
||||
}
|
||||
|
||||
// Deprecated: The GenLabels function is only to be used during the transition
|
||||
// to the official API. Use InitLabels(strings.Fields(options)) instead.
|
||||
func GenLabels(string) (string, string, error) {
|
||||
return "", "", nil
|
||||
}
|
||||
|
||||
func SetFileLabel(string, string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func SetFileCreateLabel(string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func Relabel(string, string, bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// DisableSecOpt returns a security opt that can disable labeling
|
||||
// support for future container processes
|
||||
func DisableSecOpt() []string {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Validate checks that the label does not include unexpected options
|
||||
func Validate(string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// RelabelNeeded checks whether the user requested a relabel
|
||||
func RelabelNeeded(string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// IsShared checks that the label includes a "shared" mark
|
||||
func IsShared(string) bool {
|
||||
return false
|
||||
}
|
314
e2e/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go
generated
vendored
Normal file
314
e2e/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go
generated
vendored
Normal file
@ -0,0 +1,314 @@
|
||||
package selinux
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
const (
|
||||
// Enforcing constant indicate SELinux is in enforcing mode
|
||||
Enforcing = 1
|
||||
// Permissive constant to indicate SELinux is in permissive mode
|
||||
Permissive = 0
|
||||
// Disabled constant to indicate SELinux is disabled
|
||||
Disabled = -1
|
||||
// maxCategory is the maximum number of categories used within containers
|
||||
maxCategory = 1024
|
||||
// DefaultCategoryRange is the upper bound on the category range
|
||||
DefaultCategoryRange = uint32(maxCategory)
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrMCSAlreadyExists is returned when trying to allocate a duplicate MCS.
|
||||
ErrMCSAlreadyExists = errors.New("MCS label already exists")
|
||||
// ErrEmptyPath is returned when an empty path has been specified.
|
||||
ErrEmptyPath = errors.New("empty path")
|
||||
|
||||
// ErrInvalidLabel is returned when an invalid label is specified.
|
||||
ErrInvalidLabel = errors.New("invalid Label")
|
||||
|
||||
// InvalidLabel is returned when an invalid label is specified.
|
||||
//
|
||||
// Deprecated: use [ErrInvalidLabel].
|
||||
InvalidLabel = ErrInvalidLabel
|
||||
|
||||
// ErrIncomparable is returned two levels are not comparable
|
||||
ErrIncomparable = errors.New("incomparable levels")
|
||||
// ErrLevelSyntax is returned when a sensitivity or category do not have correct syntax in a level
|
||||
ErrLevelSyntax = errors.New("invalid level syntax")
|
||||
|
||||
// ErrContextMissing is returned if a requested context is not found in a file.
|
||||
ErrContextMissing = errors.New("context does not have a match")
|
||||
// ErrVerifierNil is returned when a context verifier function is nil.
|
||||
ErrVerifierNil = errors.New("verifier function is nil")
|
||||
|
||||
// CategoryRange allows the upper bound on the category range to be adjusted
|
||||
CategoryRange = DefaultCategoryRange
|
||||
|
||||
privContainerMountLabel string
|
||||
)
|
||||
|
||||
// Context is a representation of the SELinux label broken into 4 parts
|
||||
type Context map[string]string
|
||||
|
||||
// SetDisabled disables SELinux support for the package
|
||||
func SetDisabled() {
|
||||
setDisabled()
|
||||
}
|
||||
|
||||
// GetEnabled returns whether SELinux is currently enabled.
|
||||
func GetEnabled() bool {
|
||||
return getEnabled()
|
||||
}
|
||||
|
||||
// ClassIndex returns the int index for an object class in the loaded policy,
|
||||
// or -1 and an error
|
||||
func ClassIndex(class string) (int, error) {
|
||||
return classIndex(class)
|
||||
}
|
||||
|
||||
// SetFileLabel sets the SELinux label for this path, following symlinks,
|
||||
// or returns an error.
|
||||
func SetFileLabel(fpath string, label string) error {
|
||||
return setFileLabel(fpath, label)
|
||||
}
|
||||
|
||||
// LsetFileLabel sets the SELinux label for this path, not following symlinks,
|
||||
// or returns an error.
|
||||
func LsetFileLabel(fpath string, label string) error {
|
||||
return lSetFileLabel(fpath, label)
|
||||
}
|
||||
|
||||
// FileLabel returns the SELinux label for this path, following symlinks,
|
||||
// or returns an error.
|
||||
func FileLabel(fpath string) (string, error) {
|
||||
return fileLabel(fpath)
|
||||
}
|
||||
|
||||
// LfileLabel returns the SELinux label for this path, not following symlinks,
|
||||
// or returns an error.
|
||||
func LfileLabel(fpath string) (string, error) {
|
||||
return lFileLabel(fpath)
|
||||
}
|
||||
|
||||
// SetFSCreateLabel tells the kernel what label to use for all file system objects
|
||||
// created by this task.
|
||||
// Set the label to an empty string to return to the default label. Calls to SetFSCreateLabel
|
||||
// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until file system
|
||||
// objects created by this task are finished to guarantee another goroutine does not migrate
|
||||
// to the current thread before execution is complete.
|
||||
func SetFSCreateLabel(label string) error {
|
||||
return setFSCreateLabel(label)
|
||||
}
|
||||
|
||||
// FSCreateLabel returns the default label the kernel which the kernel is using
|
||||
// for file system objects created by this task. "" indicates default.
|
||||
func FSCreateLabel() (string, error) {
|
||||
return fsCreateLabel()
|
||||
}
|
||||
|
||||
// CurrentLabel returns the SELinux label of the current process thread, or an error.
|
||||
func CurrentLabel() (string, error) {
|
||||
return currentLabel()
|
||||
}
|
||||
|
||||
// PidLabel returns the SELinux label of the given pid, or an error.
|
||||
func PidLabel(pid int) (string, error) {
|
||||
return pidLabel(pid)
|
||||
}
|
||||
|
||||
// ExecLabel returns the SELinux label that the kernel will use for any programs
|
||||
// that are executed by the current process thread, or an error.
|
||||
func ExecLabel() (string, error) {
|
||||
return execLabel()
|
||||
}
|
||||
|
||||
// CanonicalizeContext takes a context string and writes it to the kernel
|
||||
// the function then returns the context that the kernel will use. Use this
|
||||
// function to check if two contexts are equivalent
|
||||
func CanonicalizeContext(val string) (string, error) {
|
||||
return canonicalizeContext(val)
|
||||
}
|
||||
|
||||
// ComputeCreateContext requests the type transition from source to target for
|
||||
// class from the kernel.
|
||||
func ComputeCreateContext(source string, target string, class string) (string, error) {
|
||||
return computeCreateContext(source, target, class)
|
||||
}
|
||||
|
||||
// CalculateGlbLub computes the glb (greatest lower bound) and lub (least upper bound)
|
||||
// of a source and target range.
|
||||
// The glblub is calculated as the greater of the low sensitivities and
|
||||
// the lower of the high sensitivities and the and of each category bitset.
|
||||
func CalculateGlbLub(sourceRange, targetRange string) (string, error) {
|
||||
return calculateGlbLub(sourceRange, targetRange)
|
||||
}
|
||||
|
||||
// SetExecLabel sets the SELinux label that the kernel will use for any programs
|
||||
// that are executed by the current process thread, or an error. Calls to SetExecLabel
|
||||
// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until execution
|
||||
// of the program is finished to guarantee another goroutine does not migrate to the current
|
||||
// thread before execution is complete.
|
||||
func SetExecLabel(label string) error {
|
||||
return writeCon(attrPath("exec"), label)
|
||||
}
|
||||
|
||||
// SetTaskLabel sets the SELinux label for the current thread, or an error.
|
||||
// This requires the dyntransition permission. Calls to SetTaskLabel should
|
||||
// be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() to guarantee
|
||||
// the current thread does not run in a new mislabeled thread.
|
||||
func SetTaskLabel(label string) error {
|
||||
return writeCon(attrPath("current"), label)
|
||||
}
|
||||
|
||||
// SetSocketLabel takes a process label and tells the kernel to assign the
|
||||
// label to the next socket that gets created. Calls to SetSocketLabel
|
||||
// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until
|
||||
// the socket is created to guarantee another goroutine does not migrate
|
||||
// to the current thread before execution is complete.
|
||||
func SetSocketLabel(label string) error {
|
||||
return writeCon(attrPath("sockcreate"), label)
|
||||
}
|
||||
|
||||
// SocketLabel retrieves the current socket label setting
|
||||
func SocketLabel() (string, error) {
|
||||
return readCon(attrPath("sockcreate"))
|
||||
}
|
||||
|
||||
// PeerLabel retrieves the label of the client on the other side of a socket
|
||||
func PeerLabel(fd uintptr) (string, error) {
|
||||
return peerLabel(fd)
|
||||
}
|
||||
|
||||
// SetKeyLabel takes a process label and tells the kernel to assign the
|
||||
// label to the next kernel keyring that gets created. Calls to SetKeyLabel
|
||||
// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until
|
||||
// the kernel keyring is created to guarantee another goroutine does not migrate
|
||||
// to the current thread before execution is complete.
|
||||
func SetKeyLabel(label string) error {
|
||||
return setKeyLabel(label)
|
||||
}
|
||||
|
||||
// KeyLabel retrieves the current kernel keyring label setting
|
||||
func KeyLabel() (string, error) {
|
||||
return readCon("/proc/self/attr/keycreate")
|
||||
}
|
||||
|
||||
// Get returns the Context as a string
|
||||
func (c Context) Get() string {
|
||||
return c.get()
|
||||
}
|
||||
|
||||
// NewContext creates a new Context struct from the specified label
|
||||
func NewContext(label string) (Context, error) {
|
||||
return newContext(label)
|
||||
}
|
||||
|
||||
// ClearLabels clears all reserved labels
|
||||
func ClearLabels() {
|
||||
clearLabels()
|
||||
}
|
||||
|
||||
// ReserveLabel reserves the MLS/MCS level component of the specified label
|
||||
func ReserveLabel(label string) {
|
||||
reserveLabel(label)
|
||||
}
|
||||
|
||||
// MLSEnabled checks if MLS is enabled.
|
||||
func MLSEnabled() bool {
|
||||
return isMLSEnabled()
|
||||
}
|
||||
|
||||
// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled
|
||||
func EnforceMode() int {
|
||||
return enforceMode()
|
||||
}
|
||||
|
||||
// SetEnforceMode sets the current SELinux mode Enforcing, Permissive.
|
||||
// Disabled is not valid, since this needs to be set at boot time.
|
||||
func SetEnforceMode(mode int) error {
|
||||
return setEnforceMode(mode)
|
||||
}
|
||||
|
||||
// DefaultEnforceMode returns the systems default SELinux mode Enforcing,
|
||||
// Permissive or Disabled. Note this is just the default at boot time.
|
||||
// EnforceMode tells you the systems current mode.
|
||||
func DefaultEnforceMode() int {
|
||||
return defaultEnforceMode()
|
||||
}
|
||||
|
||||
// ReleaseLabel un-reserves the MLS/MCS Level field of the specified label,
|
||||
// allowing it to be used by another process.
|
||||
func ReleaseLabel(label string) {
|
||||
releaseLabel(label)
|
||||
}
|
||||
|
||||
// ROFileLabel returns the specified SELinux readonly file label
|
||||
func ROFileLabel() string {
|
||||
return roFileLabel()
|
||||
}
|
||||
|
||||
// KVMContainerLabels returns the default processLabel and mountLabel to be used
|
||||
// for kvm containers by the calling process.
|
||||
func KVMContainerLabels() (string, string) {
|
||||
return kvmContainerLabels()
|
||||
}
|
||||
|
||||
// InitContainerLabels returns the default processLabel and file labels to be
|
||||
// used for containers running an init system like systemd by the calling process.
|
||||
func InitContainerLabels() (string, string) {
|
||||
return initContainerLabels()
|
||||
}
|
||||
|
||||
// ContainerLabels returns an allocated processLabel and fileLabel to be used for
|
||||
// container labeling by the calling process.
|
||||
func ContainerLabels() (processLabel string, fileLabel string) {
|
||||
return containerLabels()
|
||||
}
|
||||
|
||||
// SecurityCheckContext validates that the SELinux label is understood by the kernel
|
||||
func SecurityCheckContext(val string) error {
|
||||
return securityCheckContext(val)
|
||||
}
|
||||
|
||||
// CopyLevel returns a label with the MLS/MCS level from src label replaced on
|
||||
// the dest label.
|
||||
func CopyLevel(src, dest string) (string, error) {
|
||||
return copyLevel(src, dest)
|
||||
}
|
||||
|
||||
// Chcon changes the fpath file object to the SELinux label.
|
||||
// If fpath is a directory and recurse is true, then Chcon walks the
|
||||
// directory tree setting the label.
|
||||
//
|
||||
// The fpath itself is guaranteed to be relabeled last.
|
||||
func Chcon(fpath string, label string, recurse bool) error {
|
||||
return chcon(fpath, label, recurse)
|
||||
}
|
||||
|
||||
// DupSecOpt takes an SELinux process label and returns security options that
|
||||
// can be used to set the SELinux Type and Level for future container processes.
|
||||
func DupSecOpt(src string) ([]string, error) {
|
||||
return dupSecOpt(src)
|
||||
}
|
||||
|
||||
// DisableSecOpt returns a security opt that can be used to disable SELinux
|
||||
// labeling support for future container processes.
|
||||
func DisableSecOpt() []string {
|
||||
return []string{"disable"}
|
||||
}
|
||||
|
||||
// GetDefaultContextWithLevel gets a single context for the specified SELinux user
|
||||
// identity that is reachable from the specified scon context. The context is based
|
||||
// on the per-user /etc/selinux/{SELINUXTYPE}/contexts/users/<username> if it exists,
|
||||
// and falls back to the global /etc/selinux/{SELINUXTYPE}/contexts/default_contexts
|
||||
// file.
|
||||
func GetDefaultContextWithLevel(user, level, scon string) (string, error) {
|
||||
return getDefaultContextWithLevel(user, level, scon)
|
||||
}
|
||||
|
||||
// PrivContainerMountLabel returns mount label for privileged containers
|
||||
func PrivContainerMountLabel() string {
|
||||
// Make sure label is initialized.
|
||||
_ = label("")
|
||||
return privContainerMountLabel
|
||||
}
|
1296
e2e/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go
generated
vendored
Normal file
1296
e2e/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user