diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2022-02-19 15:24:12 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2022-02-19 15:24:12 +0000 |
commit | 242f2cae782d433d69d195e14564b6437ec9f7e6 (patch) | |
tree | 637e34022e9979c74bdd570d3b9a58f14db76fc9 | |
parent | 439346ce8fa32095433a9abb2aa3564d11283372 (diff) | |
parent | 45b04ef48dbbeb18d93c2631bf5584ac493de749 (diff) |
Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20220217b' into staging
V3: virtiofs pull 2022-02-17
Security label improvements from Vivek
- includes a fix for building against new kernel headers
[V3: checkpatch style fixes]
[V2: Fix building on old Linux]
Blocking flock disable from Sebastian
SYNCFS support from Greg
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
# gpg: Signature made Thu 17 Feb 2022 17:24:25 GMT
# gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7
* remotes/dgilbert-gitlab/tags/pull-virtiofs-20220217b:
virtiofsd: Add basic support for FUSE_SYNCFS request
virtiofsd: Add an option to enable/disable security label
virtiofsd: Create new file using O_TMPFILE and set security context
virtiofsd: Create new file with security context
virtiofsd: Add helpers to work with /proc/self/task/tid/attr/fscreate
virtiofsd: Move core file creation code in separate function
virtiofsd, fuse_lowlevel.c: Add capability to parse security context
virtiofsd: Extend size of fuse_conn_info->capable and ->want fields
virtiofsd: Parse extended "struct fuse_init_in"
linux-headers: Update headers to v5.17-rc1
virtiofsd: Fix breakage due to fuse_init_in size change
virtiofsd: Do not support blocking flock
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
34 files changed, 1123 insertions, 132 deletions
diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst index 07ac0be551..0c0560203c 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -104,6 +104,13 @@ Options * posix_acl|no_posix_acl - Enable/disable posix acl support. Posix ACLs are disabled by default. + * security_label|no_security_label - + Enable/disable security label support. Security labels are disabled by + default. This will allow client to send a MAC label of file during + file creation. Typically this is expected to be SELinux security + label. Server will try to set that label on newly created file + atomically wherever possible. + .. option:: --socket-path=PATH Listen on vhost-user UNIX domain socket at PATH. @@ -348,6 +355,31 @@ client arguments or lists returned from the host. This stops the client seeing any 'security.' attributes on the server and stops it setting any. +SELinux support +--------------- +One can enable support for SELinux by running virtiofsd with option +"-o security_label". But this will try to save guest's security context +in xattr security.selinux on host and it might fail if host's SELinux +policy does not permit virtiofsd to do this operation. + +Hence, it is preferred to remap guest's "security.selinux" xattr to say +"trusted.virtiofs.security.selinux" on host. + +"-o xattrmap=:map:security.selinux:trusted.virtiofs.:" + +This will make sure that guest and host's SELinux xattrs on same file +remain separate and not interfere with each other. And will allow both +host and guest to implement their own separate SELinux policies. + +Setting trusted xattr on host requires CAP_SYS_ADMIN. So one will need +add this capability to daemon. + +"-o modcaps=+sys_admin" + +Giving CAP_SYS_ADMIN increases the risk on system. Now virtiofsd is more +powerful and if gets compromised, it can do lot of damage to host system. +So keep this trade-off in my mind while making a decision. + Examples -------- diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h index 204cfb8640..f0235e58a1 100644 --- a/include/standard-headers/asm-x86/kvm_para.h +++ b/include/standard-headers/asm-x86/kvm_para.h @@ -8,6 +8,7 @@ * should be used to determine that a VM is running under KVM. */ #define KVM_CPUID_SIGNATURE 0x40000000 +#define KVM_SIGNATURE "KVMKVMKVM\0\0\0" /* This CPUID returns two feature bitmaps in eax, edx. Before enabling * a particular paravirtualization, the appropriate feature bit should diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index 2c025cb4fe..4888f85f69 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -313,6 +313,13 @@ extern "C" { */ #define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */ +/* 2 plane YCbCr420. + * 3 10 bit components and 2 padding bits packed into 4 bytes. + * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian + * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian + */ +#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ + /* 3 plane non-subsampled (444) YCbCr * 16 bits per component, but only 10 bits are used and 6 bits are padded * index 0: Y plane, [15:0] Y:x [10:6] little endian @@ -853,6 +860,10 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) * and UV. Some SAND-using hardware stores UV in a separate tiled * image from Y to reduce the column height, which is not supported * with these modifiers. + * + * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also + * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes + * wide, but as this is a 10 bpp format that translates to 96 pixels. */ #define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \ diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 688eb8dc39..38d5a4cd6e 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -231,6 +231,7 @@ enum tunable_id { ETHTOOL_RX_COPYBREAK, ETHTOOL_TX_COPYBREAK, ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ + ETHTOOL_TX_COPYBREAK_BUF_SIZE, /* * Add your fresh new tunable attribute above and remember to update * tunable_strings[] in net/ethtool/common.c diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index 23ea31708b..bda06258be 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -184,6 +184,16 @@ * * 7.34 * - add FUSE_SYNCFS + * + * 7.35 + * - add FOPEN_NOFLUSH + * + * 7.36 + * - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag + * - add flags2 to fuse_init_in and fuse_init_out + * - add FUSE_SECURITY_CTX init flag + * - add security context to create, mkdir, symlink, and mknod requests + * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX */ #ifndef _LINUX_FUSE_H @@ -215,7 +225,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 34 +#define FUSE_KERNEL_MINOR_VERSION 36 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -286,12 +296,14 @@ struct fuse_file_lock { * FOPEN_NONSEEKABLE: the file is not seekable * FOPEN_CACHE_DIR: allow caching this directory * FOPEN_STREAM: the file is stream-like (no file position at all) + * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) #define FOPEN_CACHE_DIR (1 << 3) #define FOPEN_STREAM (1 << 4) +#define FOPEN_NOFLUSH (1 << 5) /** * INIT request/reply flags @@ -332,6 +344,11 @@ struct fuse_file_lock { * write/truncate sgid is killed only if file has group * execute permission. (Same as Linux VFS behavior). * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in + * FUSE_INIT_EXT: extended fuse_init_in request + * FUSE_INIT_RESERVED: reserved, do not use + * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and + * mknod + * FUSE_HAS_INODE_DAX: use per inode DAX */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -363,6 +380,11 @@ struct fuse_file_lock { #define FUSE_SUBMOUNTS (1 << 27) #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) #define FUSE_SETXATTR_EXT (1 << 29) +#define FUSE_INIT_EXT (1 << 30) +#define FUSE_INIT_RESERVED (1 << 31) +/* bits 32..63 get shifted down 32 bits into the flags2 field */ +#define FUSE_SECURITY_CTX (1ULL << 32) +#define FUSE_HAS_INODE_DAX (1ULL << 33) /** * CUSE INIT request/reply flags @@ -445,8 +467,10 @@ struct fuse_file_lock { * fuse_attr flags * * FUSE_ATTR_SUBMOUNT: Object is a submount root + * FUSE_ATTR_DAX: Enable DAX for this file in per inode DAX mode */ #define FUSE_ATTR_SUBMOUNT (1 << 0) +#define FUSE_ATTR_DAX (1 << 1) /** * Open flags @@ -732,6 +756,8 @@ struct fuse_init_in { uint32_t minor; uint32_t max_readahead; uint32_t flags; + uint32_t flags2; + uint32_t unused[11]; }; #define FUSE_COMPAT_INIT_OUT_SIZE 8 @@ -748,7 +774,8 @@ struct fuse_init_out { uint32_t time_gran; uint16_t max_pages; uint16_t map_alignment; - uint32_t unused[8]; + uint32_t flags2; + uint32_t unused[7]; }; #define CUSE_INIT_INFO_MAX 4096 @@ -856,9 +883,12 @@ struct fuse_dirent { char name[]; }; -#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) -#define FUSE_DIRENT_ALIGN(x) \ +/* Align variable length records to 64bit boundary */ +#define FUSE_REC_ALIGN(x) \ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) + +#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +#define FUSE_DIRENT_ALIGN(x) FUSE_REC_ALIGN(x) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) @@ -975,4 +1005,26 @@ struct fuse_syncfs_in { uint64_t padding; }; +/* + * For each security context, send fuse_secctx with size of security context + * fuse_secctx will be followed by security context name and this in turn + * will be followed by actual context label. + * fuse_secctx, name, context + */ +struct fuse_secctx { + uint32_t size; + uint32_t padding; +}; + +/* + * Contains the information about how many fuse_secctx structures are being + * sent and what's the total size of all security contexts (including + * size of fuse_secctx_header). + * + */ +struct fuse_secctx_header { + uint32_t size; + uint32_t nr_secctx; +}; + #endif /* _LINUX_FUSE_H */ diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index ff6ccbc6ef..bee1a9ed6e 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -301,23 +301,23 @@ #define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ #define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ -/* Message Signalled Interrupt registers */ +/* Message Signaled Interrupt registers */ -#define PCI_MSI_FLAGS 2 /* Message Control */ +#define PCI_MSI_FLAGS 0x02 /* Message Control */ #define PCI_MSI_FLAGS_ENABLE 0x0001 /* MSI feature enabled */ #define PCI_MSI_FLAGS_QMASK 0x000e /* Maximum queue size available */ #define PCI_MSI_FLAGS_QSIZE 0x0070 /* Message queue size configured */ #define PCI_MSI_FLAGS_64BIT 0x0080 /* 64-bit addresses allowed */ #define PCI_MSI_FLAGS_MASKBIT 0x0100 /* Per-vector masking capable */ #define PCI_MSI_RFU 3 /* Rest of capability flags */ -#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ -#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ -#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ -#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */ -#define PCI_MSI_PENDING_32 16 /* Pending intrs for 32-bit devices */ -#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ -#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ -#define PCI_MSI_PENDING_64 20 /* Pending intrs for 64-bit devices */ +#define PCI_MSI_ADDRESS_LO 0x04 /* Lower 32 bits */ +#define PCI_MSI_ADDRESS_HI 0x08 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +#define PCI_MSI_DATA_32 0x08 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_MASK_32 0x0c /* Mask bits register for 32-bit devices */ +#define PCI_MSI_PENDING_32 0x10 /* Pending intrs for 32-bit devices */ +#define PCI_MSI_DATA_64 0x0c /* 16 bits of data for 64-bit devices */ +#define PCI_MSI_MASK_64 0x10 /* Mask bits register for 64-bit devices */ +#define PCI_MSI_PENDING_64 0x14 /* Pending intrs for 64-bit devices */ /* MSI-X registers (in MSI-X capability) */ #define PCI_MSIX_FLAGS 2 /* Message Control */ @@ -335,10 +335,10 @@ /* MSI-X Table entry format (in memory mapped by a BAR) */ #define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR 0 /* Message Address */ -#define PCI_MSIX_ENTRY_UPPER_ADDR 4 /* Message Upper Address */ -#define PCI_MSIX_ENTRY_DATA 8 /* Message Data */ -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 /* Vector Control */ +#define PCI_MSIX_ENTRY_LOWER_ADDR 0x0 /* Message Address */ +#define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */ +#define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */ +#define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */ #define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 /* CompactPCI Hotswap Register */ @@ -470,7 +470,7 @@ /* PCI Express capability registers */ -#define PCI_EXP_FLAGS 2 /* Capabilities register */ +#define PCI_EXP_FLAGS 0x02 /* Capabilities register */ #define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ #define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ #define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ @@ -484,7 +484,7 @@ #define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ -#define PCI_EXP_DEVCAP 4 /* Device capabilities */ +#define PCI_EXP_DEVCAP 0x04 /* Device capabilities */ #define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */ #define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */ #define PCI_EXP_DEVCAP_EXT_TAG 0x00000020 /* Extended tags */ @@ -497,7 +497,7 @@ #define PCI_EXP_DEVCAP_PWR_VAL 0x03fc0000 /* Slot Power Limit Value */ #define PCI_EXP_DEVCAP_PWR_SCL 0x0c000000 /* Slot Power Limit Scale */ #define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ -#define PCI_EXP_DEVCTL 8 /* Device Control */ +#define PCI_EXP_DEVCTL 0x08 /* Device Control */ #define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ #define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ #define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ @@ -522,7 +522,7 @@ #define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */ #define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */ #define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ -#define PCI_EXP_DEVSTA 10 /* Device Status */ +#define PCI_EXP_DEVSTA 0x0a /* Device Status */ #define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */ #define PCI_EXP_DEVSTA_NFED 0x0002 /* Non-Fatal Error Detected */ #define PCI_EXP_DEVSTA_FED 0x0004 /* Fatal Error Detected */ @@ -530,7 +530,7 @@ #define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */ #define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */ -#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ +#define PCI_EXP_LNKCAP 0x0c /* Link Capabilities */ #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ #define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ #define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ @@ -549,7 +549,7 @@ #define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ #define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ #define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ -#define PCI_EXP_LNKCTL 16 /* Link Control */ +#define PCI_EXP_LNKCTL 0x10 /* Link Control */ #define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ #define PCI_EXP_LNKCTL_ASPM_L0S 0x0001 /* L0s Enable */ #define PCI_EXP_LNKCTL_ASPM_L1 0x0002 /* L1 Enable */ @@ -562,7 +562,7 @@ #define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ #define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ #define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */ -#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_LNKSTA 0x12 /* Link Status */ #define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ #define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ @@ -582,7 +582,7 @@ #define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ #define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints with link end here */ -#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCAP 0x14 /* Slot Capabilities */ #define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ #define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ #define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ @@ -595,7 +595,7 @@ #define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ #define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ #define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ -#define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTCTL 0x18 /* Slot Control */ #define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ #define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ #define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ @@ -617,7 +617,7 @@ #define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ #define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ #define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */ -#define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_SLTSTA 0x1a /* Slot Status */ #define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ #define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ #define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ @@ -627,15 +627,15 @@ #define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ #define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ #define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ -#define PCI_EXP_RTCTL 28 /* Root Control */ +#define PCI_EXP_RTCTL 0x1c /* Root Control */ #define PCI_EXP_RTCTL_SECEE 0x0001 /* System Error on Correctable Error */ #define PCI_EXP_RTCTL_SENFEE 0x0002 /* System Error on Non-Fatal Error */ #define PCI_EXP_RTCTL_SEFEE 0x0004 /* System Error on Fatal Error */ #define PCI_EXP_RTCTL_PMEIE 0x0008 /* PME Interrupt Enable */ #define PCI_EXP_RTCTL_CRSSVE 0x0010 /* CRS Software Visibility Enable */ -#define PCI_EXP_RTCAP 30 /* Root Capabilities */ +#define PCI_EXP_RTCAP 0x1e /* Root Capabilities */ #define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ -#define PCI_EXP_RTSTA 32 /* Root Status */ +#define PCI_EXP_RTSTA 0x20 /* Root Status */ #define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ #define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ /* @@ -646,7 +646,7 @@ * Use pcie_capability_read_word() and similar interfaces to use them * safely. */ -#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCAP2 0x24 /* Device Capabilities 2 */ #define PCI_EXP_DEVCAP2_COMP_TMOUT_DIS 0x00000010 /* Completion Timeout Disable supported */ #define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ #define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ @@ -658,7 +658,7 @@ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ #define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */ -#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ +#define PCI_EXP_DEVCTL2 0x28 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ @@ -670,9 +670,9 @@ #define PCI_EXP_DEVCTL2_OBFF_MSGA_EN 0x2000 /* Enable OBFF Message type A */ #define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */ #define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ -#define PCI_EXP_DEVSTA2 42 /* Device Status 2 */ -#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */ -#define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */ +#define PCI_EXP_DEVSTA2 0x2a /* Device Status 2 */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */ +#define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities 2 */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ @@ -680,7 +680,7 @@ #define PCI_EXP_LNKCAP2_SLS_32_0GB 0x00000020 /* Supported Speed 32GT/s */ #define PCI_EXP_LNKCAP2_SLS_64_0GB 0x00000040 /* Supported Speed 64GT/s */ #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ -#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ +#define PCI_EXP_LNKCTL2 0x30 /* Link Control 2 */ #define PCI_EXP_LNKCTL2_TLS 0x000f #define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ @@ -691,12 +691,12 @@ #define PCI_EXP_LNKCTL2_ENTER_COMP 0x0010 /* Enter Compliance */ #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ -#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ -#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ +#define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ +#define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ -#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ -#define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ +#define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */ +#define PCI_EXP_SLTSTA2 0x3a /* Slot Status 2 */ /* Extended Capabilities (PCI-X 2.0 and Express) */ #define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) @@ -742,7 +742,7 @@ #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 /* Advanced Error Reporting */ -#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ +#define PCI_ERR_UNCOR_STATUS 0x04 /* Uncorrectable Error Status */ #define PCI_ERR_UNC_UND 0x00000001 /* Undefined */ #define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ #define PCI_ERR_UNC_SURPDN 0x00000020 /* Surprise Down */ @@ -760,11 +760,11 @@ #define PCI_ERR_UNC_MCBTLP 0x00800000 /* MC blocked TLP */ #define PCI_ERR_UNC_ATOMEG 0x01000000 /* Atomic egress blocked */ #define PCI_ERR_UNC_TLPPRE 0x02000000 /* TLP prefix blocked */ -#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ +#define PCI_ERR_UNCOR_MASK 0x08 /* Uncorrectable Error Mask */ /* Same bits as above */ -#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ +#define PCI_ERR_UNCOR_SEVER 0x0c /* Uncorrectable Error Severity */ /* Same bits as above */ -#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ +#define PCI_ERR_COR_STATUS 0x10 /* Correctable Error Status */ #define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ #define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ #define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ @@ -773,20 +773,20 @@ #define PCI_ERR_COR_ADV_NFAT 0x00002000 /* Advisory Non-Fatal */ #define PCI_ERR_COR_INTERNAL 0x00004000 /* Corrected Internal */ #define PCI_ERR_COR_LOG_OVER 0x00008000 /* Header Log Overflow */ -#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ +#define PCI_ERR_COR_MASK 0x14 /* Correctable Error Mask */ /* Same bits as above */ -#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ -#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ +#define PCI_ERR_CAP 0x18 /* Advanced Error Capabilities & Ctrl*/ +#define PCI_ERR_CAP_FEP(x) ((x) & 0x1f) /* First Error Pointer */ #define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ #define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ #define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ #define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ -#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ -#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ +#define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */ +#define PCI_ERR_ROOT_COMMAND 0x2c /* Root Error Command */ #define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ #define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */ #define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */ -#define PCI_ERR_ROOT_STATUS 48 +#define PCI_ERR_ROOT_STATUS 0x30 #define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ #define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */ #define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */ @@ -795,52 +795,52 @@ #define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ #define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ #define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */ -#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ +#define PCI_ERR_ROOT_ERR_SRC 0x34 /* Error Source Identification */ /* Virtual Channel */ -#define PCI_VC_PORT_CAP1 4 +#define PCI_VC_PORT_CAP1 0x04 #define PCI_VC_CAP1_EVCC 0x00000007 /* extended VC count */ #define PCI_VC_CAP1_LPEVCC 0x00000070 /* low prio extended VC count */ #define PCI_VC_CAP1_ARB_SIZE 0x00000c00 -#define PCI_VC_PORT_CAP2 8 +#define PCI_VC_PORT_CAP2 0x08 #define PCI_VC_CAP2_32_PHASE 0x00000002 #define PCI_VC_CAP2_64_PHASE 0x00000004 #define PCI_VC_CAP2_128_PHASE 0x00000008 #define PCI_VC_CAP2_ARB_OFF 0xff000000 -#define PCI_VC_PORT_CTRL 12 +#define PCI_VC_PORT_CTRL 0x0c #define PCI_VC_PORT_CTRL_LOAD_TABLE 0x00000001 -#define PCI_VC_PORT_STATUS 14 +#define PCI_VC_PORT_STATUS 0x0e #define PCI_VC_PORT_STATUS_TABLE 0x00000001 -#define PCI_VC_RES_CAP 16 +#define PCI_VC_RES_CAP 0x10 #define PCI_VC_RES_CAP_32_PHASE 0x00000002 #define PCI_VC_RES_CAP_64_PHASE 0x00000004 #define PCI_VC_RES_CAP_128_PHASE 0x00000008 #define PCI_VC_RES_CAP_128_PHASE_TB 0x00000010 #define PCI_VC_RES_CAP_256_PHASE 0x00000020 #define PCI_VC_RES_CAP_ARB_OFF 0xff000000 -#define PCI_VC_RES_CTRL 20 +#define PCI_VC_RES_CTRL 0x14 #define PCI_VC_RES_CTRL_LOAD_TABLE 0x00010000 #define PCI_VC_RES_CTRL_ARB_SELECT 0x000e0000 #define PCI_VC_RES_CTRL_ID 0x07000000 #define PCI_VC_RES_CTRL_ENABLE 0x80000000 -#define PCI_VC_RES_STATUS 26 +#define PCI_VC_RES_STATUS 0x1a #define PCI_VC_RES_STATUS_TABLE 0x00000001 #define PCI_VC_RES_STATUS_NEGO 0x00000002 #define PCI_CAP_VC_BASE_SIZEOF 0x10 -#define PCI_CAP_VC_PER_VC_SIZEOF 0x0C +#define PCI_CAP_VC_PER_VC_SIZEOF 0x0c /* Power Budgeting */ -#define PCI_PWR_DSR 4 /* Data Select Register */ -#define PCI_PWR_DATA 8 /* Data Register */ +#define PCI_PWR_DSR 0x04 /* Data Select Register */ +#define PCI_PWR_DATA 0x08 /* Data Register */ #define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ #define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ #define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ #define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ #define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ #define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ -#define PCI_PWR_CAP 12 /* Capability */ +#define PCI_PWR_CAP 0x0c /* Capability */ #define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ -#define PCI_EXT_CAP_PWR_SIZEOF 16 +#define PCI_EXT_CAP_PWR_SIZEOF 0x10 /* Root Complex Event Collector Endpoint Association */ #define PCI_RCEC_RCIEP_BITMAP 4 /* Associated Bitmap for RCiEPs */ @@ -964,7 +964,7 @@ #define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ #define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ #define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ -#define PCI_EXT_CAP_SRIOV_SIZEOF 64 +#define PCI_EXT_CAP_SRIOV_SIZEOF 0x40 #define PCI_LTR_MAX_SNOOP_LAT 0x4 #define PCI_LTR_MAX_NOSNOOP_LAT 0x6 @@ -1017,12 +1017,12 @@ #define PCI_TPH_LOC_NONE 0x000 /* no location */ #define PCI_TPH_LOC_CAP 0x200 /* in capability */ #define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ -#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* st table mask */ -#define PCI_TPH_CAP_ST_SHIFT 16 /* st table shift */ -#define PCI_TPH_BASE_SIZEOF 12 /* size with no st table */ +#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */ +#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */ +#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */ /* Downstream Port Containment */ -#define PCI_EXP_DPC_CAP 4 /* DPC Capability */ +#define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */ #define PCI_EXP_DPC_IRQ 0x001F /* Interrupt Message Number */ #define PCI_EXP_DPC_CAP_RP_EXT 0x0020 /* Root Port Extensions */ #define PCI_EXP_DPC_CAP_POISONED_TLP 0x0040 /* Poisoned TLP Egress Blocking Supported */ @@ -1030,19 +1030,19 @@ #define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size */ #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ -#define PCI_EXP_DPC_CTL 6 /* DPC control */ +#define PCI_EXP_DPC_CTL 0x06 /* DPC control */ #define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ #define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ #define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */ -#define PCI_EXP_DPC_STATUS 8 /* DPC Status */ +#define PCI_EXP_DPC_STATUS 0x08 /* DPC Status */ #define PCI_EXP_DPC_STATUS_TRIGGER 0x0001 /* Trigger Status */ #define PCI_EXP_DPC_STATUS_TRIGGER_RSN 0x0006 /* Trigger Reason */ #define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */ #define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */ #define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */ -#define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ +#define PCI_EXP_DPC_SOURCE_ID 0x0A /* DPC Source Identifier */ #define PCI_EXP_DPC_RP_PIO_STATUS 0x0C /* RP PIO Status */ #define PCI_EXP_DPC_RP_PIO_MASK 0x10 /* RP PIO Mask */ @@ -1086,7 +1086,11 @@ /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ #define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +#define PCI_DVSEC_HEADER1_VID(x) ((x) & 0xffff) +#define PCI_DVSEC_HEADER1_REV(x) (((x) >> 16) & 0xf) +#define PCI_DVSEC_HEADER1_LEN(x) (((x) >> 20) & 0xfff) #define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ +#define PCI_DVSEC_HEADER2_ID(x) ((x) & 0xffff) /* Data Link Feature */ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ diff --git a/include/standard-headers/linux/virtio_gpio.h b/include/standard-headers/linux/virtio_gpio.h new file mode 100644 index 0000000000..2b5cf06349 --- /dev/null +++ b/include/standard-headers/linux/virtio_gpio.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _LINUX_VIRTIO_GPIO_H +#define _LINUX_VIRTIO_GPIO_H + +#include "standard-headers/linux/types.h" + +/* Virtio GPIO Feature bits */ +#define VIRTIO_GPIO_F_IRQ 0 + +/* Virtio GPIO request types */ +#define VIRTIO_GPIO_MSG_GET_NAMES 0x0001 +#define VIRTIO_GPIO_MSG_GET_DIRECTION 0x0002 +#define VIRTIO_GPIO_MSG_SET_DIRECTION 0x0003 +#define VIRTIO_GPIO_MSG_GET_VALUE 0x0004 +#define VIRTIO_GPIO_MSG_SET_VALUE 0x0005 +#define VIRTIO_GPIO_MSG_IRQ_TYPE 0x0006 + +/* Possible values of the status field */ +#define VIRTIO_GPIO_STATUS_OK 0x0 +#define VIRTIO_GPIO_STATUS_ERR 0x1 + +/* Direction types */ +#define VIRTIO_GPIO_DIRECTION_NONE 0x00 +#define VIRTIO_GPIO_DIRECTION_OUT 0x01 +#define VIRTIO_GPIO_DIRECTION_IN 0x02 + +/* Virtio GPIO IRQ types */ +#define VIRTIO_GPIO_IRQ_TYPE_NONE 0x00 +#define VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING 0x01 +#define VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING 0x02 +#define VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH 0x03 +#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH 0x04 +#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW 0x08 + +struct virtio_gpio_config { + uint16_t ngpio; + uint8_t padding[2]; + uint32_t gpio_names_size; +}; + +/* Virtio GPIO Request / Response */ +struct virtio_gpio_request { + uint16_t type; + uint16_t gpio; + uint32_t value; +}; + +struct virtio_gpio_response { + uint8_t status; + uint8_t value; +}; + +struct virtio_gpio_response_get_names { + uint8_t status; + uint8_t value[]; +}; + +/* Virtio GPIO IRQ Request / Response */ +struct virtio_gpio_irq_request { + uint16_t gpio; +}; + +struct virtio_gpio_irq_response { + uint8_t status; +}; + +/* Possible values of the interrupt status field */ +#define VIRTIO_GPIO_IRQ_STATUS_INVALID 0x0 +#define VIRTIO_GPIO_IRQ_STATUS_VALID 0x1 + +#endif /* _LINUX_VIRTIO_GPIO_H */ diff --git a/include/standard-headers/linux/virtio_i2c.h b/include/standard-headers/linux/virtio_i2c.h new file mode 100644 index 0000000000..09fa907793 --- /dev/null +++ b/include/standard-headers/linux/virtio_i2c.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */ +/* + * Definitions for virtio I2C Adpter + * + * Copyright (c) 2021 Intel Corporation. All rights reserved. + */ + +#ifndef _LINUX_VIRTIO_I2C_H +#define _LINUX_VIRTIO_I2C_H + +#include "standard-headers/linux/const.h" +#include "standard-headers/linux/types.h" + +/* Virtio I2C Feature bits */ +#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0 + +/* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */ +#define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0) + +/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */ +#define VIRTIO_I2C_FLAGS_M_RD _BITUL(1) + +/** + * struct virtio_i2c_out_hdr - the virtio I2C message OUT header + * @addr: the controlled device address + * @padding: used to pad to full dword + * @flags: used for feature extensibility + */ +struct virtio_i2c_out_hdr { + uint16_t addr; + uint16_t padding; + uint32_t flags; +}; + +/** + * struct virtio_i2c_in_hdr - the virtio I2C message IN header + * @status: the processing result from the backend + */ +struct virtio_i2c_in_hdr { + uint8_t status; +}; + +/* The final status written by the device */ +#define VIRTIO_I2C_MSG_OK 0 +#define VIRTIO_I2C_MSG_ERR 1 + +#endif /* _LINUX_VIRTIO_I2C_H */ diff --git a/include/standard-headers/linux/virtio_iommu.h b/include/standard-headers/linux/virtio_iommu.h index b9443b83a1..366379c2f0 100644 --- a/include/standard-headers/linux/virtio_iommu.h +++ b/include/standard-headers/linux/virtio_iommu.h @@ -16,6 +16,7 @@ #define VIRTIO_IOMMU_F_BYPASS 3 #define VIRTIO_IOMMU_F_PROBE 4 #define VIRTIO_IOMMU_F_MMIO 5 +#define VIRTIO_IOMMU_F_BYPASS_CONFIG 6 struct virtio_iommu_range_64 { uint64_t start; @@ -36,6 +37,8 @@ struct virtio_iommu_config { struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ uint32_t probe_size; + uint8_t bypass; + uint8_t reserved[3]; }; /* Request types */ @@ -66,11 +69,14 @@ struct virtio_iommu_req_tail { uint8_t reserved[3]; }; +#define VIRTIO_IOMMU_ATTACH_F_BYPASS (1 << 0) + struct virtio_iommu_req_attach { struct virtio_iommu_req_head head; uint32_t domain; uint32_t endpoint; - uint8_t reserved[8]; + uint32_t flags; + uint8_t reserved[4]; struct virtio_iommu_req_tail tail; }; diff --git a/include/standard-headers/linux/virtio_pcidev.h b/include/standard-headers/linux/virtio_pcidev.h new file mode 100644 index 0000000000..bdf1d062da --- /dev/null +++ b/include/standard-headers/linux/virtio_pcidev.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Copyright (C) 2021 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#ifndef _LINUX_VIRTIO_PCIDEV_H +#define _LINUX_VIRTIO_PCIDEV_H +#include "standard-headers/linux/types.h" + +/** + * enum virtio_pcidev_ops - virtual PCI device operations + * @VIRTIO_PCIDEV_OP_RESERVED: reserved to catch errors + * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8; + * the @data field should be filled in by the device (in little endian). + * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8; + * the @data field contains the data to write (in little endian). + * @VIRTIO_PCIDEV_OP_MMIO_READ: read BAR mem/pio, size can be variable; + * the @data field should be filled in by the device (in little endian). + * @VIRTIO_PCIDEV_OP_MMIO_WRITE: write BAR mem/pio, size can be variable; + * the @data field contains the data to write (in little endian). + * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but + * the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE) + * @VIRTIO_PCIDEV_OP_INT: legacy INTx# pin interrupt, the addr field is 1-4 for + * the number + * @VIRTIO_PCIDEV_OP_MSI: MSI(-X) interrupt, this message basically transports + * the 16- or 32-bit write that would otherwise be done into memory, + * analogous to the write messages (@VIRTIO_PCIDEV_OP_MMIO_WRITE) above + * @VIRTIO_PCIDEV_OP_PME: Dummy message whose content is ignored (and should be + * all zeroes) to signal the PME# pin. + */ +enum virtio_pcidev_ops { + VIRTIO_PCIDEV_OP_RESERVED = 0, + VIRTIO_PCIDEV_OP_CFG_READ, + VIRTIO_PCIDEV_OP_CFG_WRITE, + VIRTIO_PCIDEV_OP_MMIO_READ, + VIRTIO_PCIDEV_OP_MMIO_WRITE, + VIRTIO_PCIDEV_OP_MMIO_MEMSET, + VIRTIO_PCIDEV_OP_INT, + VIRTIO_PCIDEV_OP_MSI, + VIRTIO_PCIDEV_OP_PME, +}; + +/** + * struct virtio_pcidev_msg - virtio PCI device operation + * @op: the operation to do + * @bar: the bar (only with BAR read/write messages) + * @reserved: reserved + * @size: the size of the read/write (in bytes) + * @addr: the address to read/write + * @data: the data, normally @size long, but just one byte for + * %VIRTIO_PCIDEV_OP_MMIO_MEMSET + * + * Note: the fields are all in native (CPU) endian, however, the + * @data values will often be in little endian (see the ops above.) + */ +struct virtio_pcidev_msg { + uint8_t op; + uint8_t bar; + uint16_t reserved; + uint32_t size; + uint64_t addr; + uint8_t data[]; +}; + +#endif /* _LINUX_VIRTIO_PCIDEV_H */ diff --git a/include/standard-headers/linux/virtio_scmi.h b/include/standard-headers/linux/virtio_scmi.h new file mode 100644 index 0000000000..8f2c305aea --- /dev/null +++ b/include/standard-headers/linux/virtio_scmi.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Copyright (C) 2020-2021 OpenSynergy GmbH + * Copyright (C) 2021 ARM Ltd. + */ + +#ifndef _LINUX_VIRTIO_SCMI_H +#define _LINUX_VIRTIO_SCMI_H + +#include "standard-headers/linux/virtio_types.h" + +/* Device implements some SCMI notifications, or delayed responses. */ +#define VIRTIO_SCMI_F_P2A_CHANNELS 0 + +/* Device implements any SCMI statistics shared memory region */ +#define VIRTIO_SCMI_F_SHARED_MEMORY 1 + +/* Virtqueues */ + +#define VIRTIO_SCMI_VQ_TX 0 /* cmdq */ +#define VIRTIO_SCMI_VQ_RX 1 /* eventq */ +#define VIRTIO_SCMI_VQ_MAX_CNT 2 + +#endif /* _LINUX_VIRTIO_SCMI_H */ diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h index 4557a8b608..1c48b0ae3b 100644 --- a/linux-headers/asm-generic/unistd.h +++ b/linux-headers/asm-generic/unistd.h @@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) #define __NR_futex_waitv 449 __SYSCALL(__NR_futex_waitv, sys_futex_waitv) +#define __NR_set_mempolicy_home_node 450 +__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) + #undef __NR_syscalls -#define __NR_syscalls 450 +#define __NR_syscalls 451 /* * 32 bit systems traditionally used different diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h index 4b3e7ad1ec..1f14a6fad3 100644 --- a/linux-headers/asm-mips/unistd_n32.h +++ b/linux-headers/asm-mips/unistd_n32.h @@ -377,5 +377,7 @@ #define __NR_landlock_add_rule (__NR_Linux + 445) #define __NR_landlock_restrict_self (__NR_Linux + 446) #define __NR_process_mrelease (__NR_Linux + 448) +#define __NR_futex_waitv (__NR_Linux + 449) +#define __NR_set_mempolicy_home_node (__NR_Linux + 450) #endif /* _ASM_UNISTD_N32_H */ diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h index 488d9298d9..e5a8ebec78 100644 --- a/linux-headers/asm-mips/unistd_n64.h +++ b/linux-headers/asm-mips/unistd_n64.h @@ -353,5 +353,7 @@ #define __NR_landlock_add_rule (__NR_Linux + 445) #define __NR_landlock_restrict_self (__NR_Linux + 446) #define __NR_process_mrelease (__NR_Linux + 448) +#define __NR_futex_waitv (__NR_Linux + 449) +#define __NR_set_mempolicy_home_node (__NR_Linux + 450) #endif /* _ASM_UNISTD_N64_H */ diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h index f47399870a..871d57168f 100644 --- a/linux-headers/asm-mips/unistd_o32.h +++ b/linux-headers/asm-mips/unistd_o32.h @@ -423,5 +423,7 @@ #define __NR_landlock_add_rule (__NR_Linux + 445) #define __NR_landlock_restrict_self (__NR_Linux + 446) #define __NR_process_mrelease (__NR_Linux + 448) +#define __NR_futex_waitv (__NR_Linux + 449) +#define __NR_set_mempolicy_home_node (__NR_Linux + 450) #endif /* _ASM_UNISTD_O32_H */ diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h index 11d54696dc..585c7fefbc 100644 --- a/linux-headers/asm-powerpc/unistd_32.h +++ b/linux-headers/asm-powerpc/unistd_32.h @@ -430,6 +430,8 @@ #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 #define __NR_process_mrelease 448 +#define __NR_futex_waitv 449 +#define __NR_set_mempolicy_home_node 450 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h index cf740bab13..350f7ec0ac 100644 --- a/linux-headers/asm-powerpc/unistd_64.h +++ b/linux-headers/asm-powerpc/unistd_64.h @@ -402,6 +402,8 @@ #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 #define __NR_process_mrelease 448 +#define __NR_futex_waitv 449 +#define __NR_set_mempolicy_home_node 450 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-riscv/bitsperlong.h b/linux-headers/asm-riscv/bitsperlong.h new file mode 100644 index 0000000000..cc5c45a9ce --- /dev/null +++ b/linux-headers/asm-riscv/bitsperlong.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2012 ARM Ltd. + * Copyright (C) 2015 Regents of the University of California + */ + +#ifndef _ASM_RISCV_BITSPERLONG_H +#define _ASM_RISCV_BITSPERLONG_H + +#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8) + +#include <asm-generic/bitsperlong.h> + +#endif /* _ASM_RISCV_BITSPERLONG_H */ diff --git a/linux-headers/asm-riscv/mman.h b/linux-headers/asm-riscv/mman.h new file mode 100644 index 0000000000..8eebf89f5a --- /dev/null +++ b/linux-headers/asm-riscv/mman.h @@ -0,0 +1 @@ +#include <asm-generic/mman.h> diff --git a/linux-headers/asm-riscv/unistd.h b/linux-headers/asm-riscv/unistd.h new file mode 100644 index 0000000000..8062996c2d --- /dev/null +++ b/linux-headers/asm-riscv/unistd.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2018 David Abdurachmanov <david.abdurachmanov@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#ifdef __LP64__ +#define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SET_GET_RLIMIT +#endif /* __LP64__ */ + +#define __ARCH_WANT_SYS_CLONE3 + +#include <asm-generic/unistd.h> + +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * __NR_riscv_flush_icache is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +#ifndef __NR_riscv_flush_icache +#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) +#endif +__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h index 8f97d98128..8e644d65f5 100644 --- a/linux-headers/asm-s390/unistd_32.h +++ b/linux-headers/asm-s390/unistd_32.h @@ -420,5 +420,7 @@ #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 #define __NR_process_mrelease 448 +#define __NR_futex_waitv 449 +#define __NR_set_mempolicy_home_node 450 #endif /* _ASM_S390_UNISTD_32_H */ diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h index 021ffc30e6..51da542fec 100644 --- a/linux-headers/asm-s390/unistd_64.h +++ b/linux-headers/asm-s390/unistd_64.h @@ -368,5 +368,7 @@ #define __NR_landlock_add_rule 445 #define __NR_landlock_restrict_self 446 #define __NR_process_mrelease 448 +#define __NR_futex_waitv 449 +#define __NR_set_mempolicy_home_node 450 #endif /* _ASM_S390_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 5a776a08f7..2da3316bb5 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -373,9 +373,23 @@ struct kvm_debugregs { __u64 reserved[9]; }; -/* for KVM_CAP_XSAVE */ +/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */ struct kvm_xsave { + /* + * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes + * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * respectively, when invoked on the vm file descriptor. + * + * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * will always be at least 4096. Currently, it is only greater + * than 4096 if a dynamic feature has been enabled with + * ``arch_prctl()``, but this may change in the future. + * + * The offsets of the state save areas in struct kvm_xsave follow + * the contents of CPUID leaf 0xD on the host. + */ __u32 region[1024]; + __u32 extra[0]; }; #define KVM_MAX_XCRS 16 diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index 9c9ffe312b..87e1e977af 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -440,6 +440,7 @@ #define __NR_memfd_secret 447 #define __NR_process_mrelease 448 #define __NR_futex_waitv 449 +#define __NR_set_mempolicy_home_node 450 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 084f1eef9c..147a78d623 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -362,6 +362,7 @@ #define __NR_memfd_secret 447 #define __NR_process_mrelease 448 #define __NR_futex_waitv 449 +#define __NR_set_mempolicy_home_node 450 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index a2441affc2..27098db7fb 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -315,6 +315,7 @@ #define __NR_memfd_secret (__X32_SYSCALL_BIT + 447) #define __NR_process_mrelease (__X32_SYSCALL_BIT + 448) #define __NR_futex_waitv (__X32_SYSCALL_BIT + 449) +#define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 02c5e7b7bb..00af3bc333 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -1130,6 +1130,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 +#define KVM_CAP_VM_GPA_BITS 207 +#define KVM_CAP_XSAVE2 208 #ifdef KVM_CAP_IRQ_ROUTING @@ -1161,11 +1164,20 @@ struct kvm_irq_routing_hv_sint { __u32 sint; }; +struct kvm_irq_routing_xen_evtchn { + __u32 port; + __u32 vcpu; + __u32 priority; +}; + +#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1)) + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 +#define KVM_IRQ_ROUTING_XEN_EVTCHN 5 struct kvm_irq_routing_entry { __u32 gsi; @@ -1177,6 +1189,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; struct kvm_irq_routing_hv_sint hv_sint; + struct kvm_irq_routing_xen_evtchn xen_evtchn; __u32 pad[8]; } u; }; @@ -1207,6 +1220,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) struct kvm_xen_hvm_config { __u32 flags; @@ -1609,6 +1623,9 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h index 0c2665b977..bf46954dab 100644 --- a/tools/virtiofsd/fuse_common.h +++ b/tools/virtiofsd/fuse_common.h @@ -378,6 +378,11 @@ struct fuse_file_info { #define FUSE_CAP_SETXATTR_EXT (1 << 29) /** + * Indicates that file server supports creating file security context + */ +#define FUSE_CAP_SECURITY_CTX (1ULL << 32) + +/** * Ioctl flags * * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine @@ -439,7 +444,7 @@ struct fuse_conn_info { /** * Capability flags that the kernel supports (read-only) */ - unsigned capable; + uint64_t capable; /** * Capability flags that the filesystem wants to enable. @@ -447,7 +452,7 @@ struct fuse_conn_info { * libfuse attempts to initialize this field with * reasonable default values before calling the init() handler. */ - unsigned want; + uint64_t want; /** * Maximum number of pending "background" requests. A diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h index 492e002181..a5572fa4ae 100644 --- a/tools/virtiofsd/fuse_i.h +++ b/tools/virtiofsd/fuse_i.h @@ -15,6 +15,12 @@ struct fv_VuDev; struct fv_QueueInfo; +struct fuse_security_context { + const char *name; + uint32_t ctxlen; + const void *ctx; +}; + struct fuse_req { struct fuse_session *se; uint64_t unique; @@ -35,6 +41,7 @@ struct fuse_req { } u; struct fuse_req *next; struct fuse_req *prev; + struct fuse_security_context secctx; }; struct fuse_notify_req { diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index e4679c73ab..752928741d 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -886,11 +886,63 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, } } +static int parse_secctx_fill_req(fuse_req_t req, struct fuse_mbuf_iter *iter) +{ + struct fuse_secctx_header *fsecctx_header; + struct fuse_secctx *fsecctx; + const void *secctx; + const char *name; + + fsecctx_header = fuse_mbuf_iter_advance(iter, sizeof(*fsecctx_header)); + if (!fsecctx_header) { + return -EINVAL; + } + + /* + * As of now maximum of one security context is supported. It can + * change in future though. + */ + if (fsecctx_header->nr_secctx > 1) { + return -EINVAL; + } + + /* No security context sent. Maybe no LSM supports it */ + if (!fsecctx_header->nr_secctx) { + return 0; + } + + fsecctx = fuse_mbuf_iter_advance(iter, sizeof(*fsecctx)); + if (!fsecctx) { + return -EINVAL; + } + + /* struct fsecctx with zero sized context is not expected */ + if (!fsecctx->size) { + return -EINVAL; + } + name = fuse_mbuf_iter_advance_str(iter); + if (!name) { + return -EINVAL; + } + + secctx = fuse_mbuf_iter_advance(iter, fsecctx->size); + if (!secctx) { + return -EINVAL; + } + + req->secctx.name = name; + req->secctx.ctx = secctx; + req->secctx.ctxlen = fsecctx->size; + return 0; +} + static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, struct fuse_mbuf_iter *iter) { struct fuse_mknod_in *arg; const char *name; + bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX; + int err; arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); name = fuse_mbuf_iter_advance_str(iter); @@ -901,6 +953,14 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, req->ctx.umask = arg->umask; + if (secctx_enabled) { + err = parse_secctx_fill_req(req, iter); + if (err) { + fuse_reply_err(req, -err); + return; + } + } + if (req->se->op.mknod) { req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); } else { @@ -913,6 +973,8 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, { struct fuse_mkdir_in *arg; const char *name; + bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX; + int err; arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); name = fuse_mbuf_iter_advance_str(iter); @@ -923,6 +985,14 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, req->ctx.umask = arg->umask; + if (secctx_enabled) { + err = parse_secctx_fill_req(req, iter); + if (err) { + fuse_reply_err(req, err); + return; + } + } + if (req->se->op.mkdir) { req->se->op.mkdir(req, nodeid, name, arg->mode); } else { @@ -969,12 +1039,22 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, { const char *name = fuse_mbuf_iter_advance_str(iter); const char *linkname = fuse_mbuf_iter_advance_str(iter); + bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX; + int err; if (!name || !linkname) { fuse_reply_err(req, EINVAL); return; } + if (secctx_enabled) { + err = parse_secctx_fill_req(req, iter); + if (err) { + fuse_reply_err(req, err); + return; + } + } + if (req->se->op.symlink) { req->se->op.symlink(req, linkname, nodeid, name); } else { @@ -1048,6 +1128,8 @@ static void do_link(fuse_req_t req, fuse_ino_t nodeid, static void do_create(fuse_req_t req, fuse_ino_t nodeid, struct fuse_mbuf_iter *iter) { + bool secctx_enabled = req->se->conn.want & FUSE_CAP_SECURITY_CTX; + if (req->se->op.create) { struct fuse_create_in *arg; struct fuse_file_info fi; @@ -1060,6 +1142,15 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, return; } + if (secctx_enabled) { + int err; + err = parse_secctx_fill_req(req, iter); + if (err) { + fuse_reply_err(req, err); + return; + } + } + memset(&fi, 0, sizeof(fi)); fi.flags = arg->flags; fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID; @@ -1876,15 +1967,30 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, } } +static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid, + struct fuse_mbuf_iter *iter) +{ + if (req->se->op.syncfs) { + req->se->op.syncfs(req, nodeid); + } else { + fuse_reply_err(req, ENOSYS); + } +} + static void do_init(fuse_req_t req, fuse_ino_t nodeid, struct fuse_mbuf_iter *iter) { size_t compat_size = offsetof(struct fuse_init_in, max_readahead); + size_t compat2_size = offsetof(struct fuse_init_in, flags) + + sizeof(uint32_t); + /* Fuse structure extended with minor version 36 */ + size_t compat3_size = endof(struct fuse_init_in, unused); struct fuse_init_in *arg; struct fuse_init_out outarg; struct fuse_session *se = req->se; size_t bufsize = se->bufsize; size_t outargsize = sizeof(outarg); + uint64_t flags = 0; (void)nodeid; @@ -1897,15 +2003,29 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, /* ...and now consume the new fields. */ if (arg->major == 7 && arg->minor >= 6) { - if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { + if (!fuse_mbuf_iter_advance(iter, compat2_size - compat_size)) { + fuse_reply_err(req, EINVAL); + return; + } + flags |= arg->flags; + } + + /* + * fuse_init_in was extended again with minor version 36. Just read + * current known size of fuse_init so that future extension and + * header rebase does not cause breakage. + */ + if (sizeof(*arg) > compat2_size && (arg->flags & FUSE_INIT_EXT)) { + if (!fuse_mbuf_iter_advance(iter, compat3_size - compat2_size)) { fuse_reply_err(req, EINVAL); return; } + flags |= (uint64_t) arg->flags2 << 32; } fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); if (arg->major == 7 && arg->minor >= 6) { - fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); + fuse_log(FUSE_LOG_DEBUG, "flags=0x%016llx\n", flags); fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead); } se->conn.proto_major = arg->major; @@ -1933,70 +2053,73 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, if (arg->max_readahead < se->conn.max_readahead) { se->conn.max_readahead = arg->max_readahead; } - if (arg->flags & FUSE_ASYNC_READ) { + if (flags & FUSE_ASYNC_READ) { se->conn.capable |= FUSE_CAP_ASYNC_READ; } - if (arg->flags & FUSE_POSIX_LOCKS) { + if (flags & FUSE_POSIX_LOCKS) { se->conn.capable |= FUSE_CAP_POSIX_LOCKS; } - if (arg->flags & FUSE_ATOMIC_O_TRUNC) { + if (flags & FUSE_ATOMIC_O_TRUNC) { se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; } - if (arg->flags & FUSE_EXPORT_SUPPORT) { + if (flags & FUSE_EXPORT_SUPPORT) { se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; } - if (arg->flags & FUSE_DONT_MASK) { + if (flags & FUSE_DONT_MASK) { se->conn.capable |= FUSE_CAP_DONT_MASK; } - if (arg->flags & FUSE_FLOCK_LOCKS) { + if (flags & FUSE_FLOCK_LOCKS) { se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; } - if (arg->flags & FUSE_AUTO_INVAL_DATA) { + if (flags & FUSE_AUTO_INVAL_DATA) { se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; } - if (arg->flags & FUSE_DO_READDIRPLUS) { + if (flags & FUSE_DO_READDIRPLUS) { se->conn.capable |= FUSE_CAP_READDIRPLUS; } - if (arg->flags & FUSE_READDIRPLUS_AUTO) { + if (flags & FUSE_READDIRPLUS_AUTO) { se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; } - if (arg->flags & FUSE_ASYNC_DIO) { + if (flags & FUSE_ASYNC_DIO) { se->conn.capable |= FUSE_CAP_ASYNC_DIO; } - if (arg->flags & FUSE_WRITEBACK_CACHE) { + if (flags & FUSE_WRITEBACK_CACHE) { se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; } - if (arg->flags & FUSE_NO_OPEN_SUPPORT) { + if (flags & FUSE_NO_OPEN_SUPPORT) { se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; } - if (arg->flags & FUSE_PARALLEL_DIROPS) { + if (flags & FUSE_PARALLEL_DIROPS) { se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; } - if (arg->flags & FUSE_POSIX_ACL) { + if (flags & FUSE_POSIX_ACL) { se->conn.capable |= FUSE_CAP_POSIX_ACL; } - if (arg->flags & FUSE_HANDLE_KILLPRIV) { + if (flags & FUSE_HANDLE_KILLPRIV) { se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; } - if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { + if (flags & FUSE_NO_OPENDIR_SUPPORT) { se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; } - if (!(arg->flags & FUSE_MAX_PAGES)) { + if (!(flags & FUSE_MAX_PAGES)) { size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + FUSE_BUFFER_HEADER_SIZE; if (bufsize > max_bufsize) { bufsize = max_bufsize; } } - if (arg->flags & FUSE_SUBMOUNTS) { + if (flags & FUSE_SUBMOUNTS) { se->conn.capable |= FUSE_CAP_SUBMOUNTS; } - if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) { + if (flags & FUSE_HANDLE_KILLPRIV_V2) { se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2; } - if (arg->flags & FUSE_SETXATTR_EXT) { + if (flags & FUSE_SETXATTR_EXT) { se->conn.capable |= FUSE_CAP_SETXATTR_EXT; } + if (flags & FUSE_SECURITY_CTX) { + se->conn.capable |= FUSE_CAP_SECURITY_CTX; + } #ifdef HAVE_SPLICE #ifdef HAVE_VMSPLICE se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; @@ -2051,7 +2174,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, if (se->conn.want & (~se->conn.capable)) { fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " - "0x%x that are not supported by kernel, aborting.\n", + "0x%llx that are not supported by kernel, aborting.\n", se->conn.want & (~se->conn.capable)); fuse_reply_err(req, EPROTO); se->error = -EPROTO; @@ -2062,7 +2185,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; } - if (arg->flags & FUSE_MAX_PAGES) { + if (flags & FUSE_MAX_PAGES) { outarg.flags |= FUSE_MAX_PAGES; outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; } @@ -2136,8 +2259,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, outarg.flags |= FUSE_SETXATTR_EXT; } + if (se->conn.want & FUSE_CAP_SECURITY_CTX) { + /* bits 32..63 get shifted down 32 bits into the flags2 field */ + outarg.flags2 |= FUSE_SECURITY_CTX >> 32; + } + fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); - fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); + fuse_log(FUSE_LOG_DEBUG, " flags2=0x%08x flags=0x%08x\n", outarg.flags2, + outarg.flags); fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead); fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background); @@ -2280,6 +2409,7 @@ static struct { [FUSE_RENAME2] = { do_rename2, "RENAME2" }, [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, [FUSE_LSEEK] = { do_lseek, "LSEEK" }, + [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" }, }; #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h index c55c0ca2fc..b889dae4de 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h @@ -1226,6 +1226,19 @@ struct fuse_lowlevel_ops { */ void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, struct fuse_file_info *fi); + + /** + * Synchronize file system content + * + * If this request is answered with an error code of ENOSYS, + * this is treated as success and future calls to syncfs() will + * succeed automatically without being sent to the filesystem + * process. + * + * @param req request handle + * @param ino the inode number + */ + void (*syncfs)(fuse_req_t req, fuse_ino_t ino); }; /** diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c index a8295d975a..e226fc590f 100644 --- a/tools/virtiofsd/helper.c +++ b/tools/virtiofsd/helper.c @@ -187,6 +187,7 @@ void fuse_cmdline_help(void) " default: no_allow_direct_io\n" " -o announce_submounts Announce sub-mount points to the guest\n" " -o posix_acl/no_posix_acl Enable/Disable posix_acl. (default: disabled)\n" + " -o security_label/no_security_label Enable/Disable security label. (default: disabled)\n" ); } diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index b3d0674f6d..dfa2fc250d 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -173,10 +173,15 @@ struct lo_data { /* An O_PATH file descriptor to /proc/self/fd/ */ int proc_self_fd; + /* An O_PATH file descriptor to /proc/self/task/ */ + int proc_self_task; int user_killpriv_v2, killpriv_v2; /* If set, virtiofsd is responsible for setting umask during creation */ bool change_umask; int user_posix_acl, posix_acl; + /* Keeps track if /proc/<pid>/attr/fscreate should be used or not */ + bool use_fscreate; + int user_security_label; }; static const struct fuse_opt lo_opts[] = { @@ -211,6 +216,8 @@ static const struct fuse_opt lo_opts[] = { { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 }, { "posix_acl", offsetof(struct lo_data, user_posix_acl), 1 }, { "no_posix_acl", offsetof(struct lo_data, user_posix_acl), 0 }, + { "security_label", offsetof(struct lo_data, user_security_label), 1 }, + { "no_security_label", offsetof(struct lo_data, user_security_label), 0 }, FUSE_OPT_END }; static bool use_syslog = false; @@ -230,6 +237,11 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, static int xattr_map_client(const struct lo_data *lo, const char *client_name, char **out_name); +#define FCHDIR_NOFAIL(fd) do { \ + int fchdir_res = fchdir(fd); \ + assert(fchdir_res == 0); \ + } while (0) + static bool is_dot_or_dotdot(const char *name) { return name[0] == '.' && @@ -257,6 +269,70 @@ static struct lo_data *lo_data(fuse_req_t req) } /* + * Tries to figure out if /proc/<pid>/attr/fscreate is usable or not. With + * selinux=0, read from fscreate returns -EINVAL. + * + * TODO: Link with libselinux and use is_selinux_enabled() instead down + * the line. It probably will be more reliable indicator. + */ +static bool is_fscreate_usable(struct lo_data *lo) +{ + char procname[64]; + int fscreate_fd; + size_t bytes_read; + + sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid)); + fscreate_fd = openat(lo->proc_self_task, procname, O_RDWR); + if (fscreate_fd == -1) { + return false; + } + + bytes_read = read(fscreate_fd, procname, 64); + close(fscreate_fd); + if (bytes_read == -1) { + return false; + } + return true; +} + +/* Helpers to set/reset fscreate */ +static int open_set_proc_fscreate(struct lo_data *lo, const void *ctx, + size_t ctxlen, int *fd) +{ + char procname[64]; + int fscreate_fd, err = 0; + size_t written; + + sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid)); + fscreate_fd = openat(lo->proc_self_task, procname, O_WRONLY); + err = fscreate_fd == -1 ? errno : 0; + if (err) { + return err; + } + + written = write(fscreate_fd, ctx, ctxlen); + err = written == -1 ? errno : 0; + if (err) { + goto out; + } + + *fd = fscreate_fd; + return 0; +out: + close(fscreate_fd); + return err; +} + +static void close_reset_proc_fscreate(int fd) +{ + if ((write(fd, NULL, 0)) == -1) { + fuse_log(FUSE_LOG_WARNING, "Failed to reset fscreate. err=%d\n", errno); + } + close(fd); + return; +} + +/* * Load capng's state from our saved state if the current thread * hadn't previously been loaded. * returns 0 on success @@ -735,6 +811,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix_acl\n"); conn->want &= ~FUSE_CAP_POSIX_ACL; } + + if (lo->user_security_label == 1) { + if (!(conn->capable & FUSE_CAP_SECURITY_CTX)) { + fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable security label." + " kernel does not support FUSE_SECURITY_CTX capability.\n"); + } + conn->want |= FUSE_CAP_SECURITY_CTX; + } else { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling security label\n"); + conn->want &= ~FUSE_CAP_SECURITY_CTX; + } } static void lo_getattr(fuse_req_t req, fuse_ino_t ino, @@ -1284,16 +1371,103 @@ static void lo_restore_cred_gain_cap(struct lo_cred *old, bool restore_umask, } } +static int do_mknod_symlink_secctx(fuse_req_t req, struct lo_inode *dir, + const char *name, const char *secctx_name) +{ + int path_fd, err; + char procname[64]; + struct lo_data *lo = lo_data(req); + + if (!req->secctx.ctxlen) { + return 0; + } + + /* Open newly created element with O_PATH */ + path_fd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); + err = path_fd == -1 ? errno : 0; + if (err) { + return err; + } + sprintf(procname, "%i", path_fd); + FCHDIR_NOFAIL(lo->proc_self_fd); + /* Set security context. This is not atomic w.r.t file creation */ + err = setxattr(procname, secctx_name, req->secctx.ctx, req->secctx.ctxlen, + 0); + if (err) { + err = errno; + } + FCHDIR_NOFAIL(lo->root.fd); + close(path_fd); + return err; +} + +static int do_mknod_symlink(fuse_req_t req, struct lo_inode *dir, + const char *name, mode_t mode, dev_t rdev, + const char *link) +{ + int err, fscreate_fd = -1; + const char *secctx_name = req->secctx.name; + struct lo_cred old = {}; + struct lo_data *lo = lo_data(req); + char *mapped_name = NULL; + bool secctx_enabled = req->secctx.ctxlen; + bool do_fscreate = false; + + if (secctx_enabled && lo->xattrmap) { + err = xattr_map_client(lo, req->secctx.name, &mapped_name); + if (err < 0) { + return -err; + } + secctx_name = mapped_name; + } + + /* + * If security xattr has not been remapped and selinux is enabled on + * host, set fscreate and no need to do a setxattr() after file creation + */ + if (secctx_enabled && !mapped_name && lo->use_fscreate) { + do_fscreate = true; + err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen, + &fscreate_fd); + if (err) { + goto out; + } + } + + err = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode)); + if (err) { + goto out; + } + + err = mknod_wrapper(dir->fd, name, link, mode, rdev); + err = err == -1 ? errno : 0; + lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode)); + if (err) { + goto out; + } + + if (!do_fscreate) { + err = do_mknod_symlink_secctx(req, dir, name, secctx_name); + if (err) { + unlinkat(dir->fd, name, S_ISDIR(mode) ? AT_REMOVEDIR : 0); + } + } +out: + if (fscreate_fd != -1) { + close_reset_proc_fscreate(fscreate_fd); + } + g_free(mapped_name); + return err; +} + static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, dev_t rdev, const char *link) { - int res; int saverr; struct lo_data *lo = lo_data(req); struct lo_inode *dir; struct fuse_entry_param e; - struct lo_cred old = {}; if (is_empty(name)) { fuse_reply_err(req, ENOENT); @@ -1311,21 +1485,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, return; } - saverr = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode)); + saverr = do_mknod_symlink(req, dir, name, mode, rdev, link); if (saverr) { goto out; } - res = mknod_wrapper(dir->fd, name, link, mode, rdev); - - saverr = errno; - - lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode)); - - if (res == -1) { - goto out; - } - saverr = lo_do_lookup(req, parent, name, &e, NULL); if (saverr) { goto out; @@ -2001,6 +2165,190 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, return 0; } +static int do_create_nosecctx(fuse_req_t req, struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, int *open_fd, + bool tmpfile) +{ + int err, fd; + struct lo_cred old = {}; + struct lo_data *lo = lo_data(req); + int flags; + + if (tmpfile) { + flags = fi->flags | O_TMPFILE; + /* + * Don't use O_EXCL as we want to link file later. Also reset O_CREAT + * otherwise openat() returns -EINVAL. + */ + flags &= ~(O_CREAT | O_EXCL); + + /* O_TMPFILE needs either O_RDWR or O_WRONLY */ + if ((flags & O_ACCMODE) == O_RDONLY) { + flags |= O_RDWR; + } + } else { + flags = fi->flags | O_CREAT | O_EXCL; + } + + err = lo_change_cred(req, &old, lo->change_umask); + if (err) { + return err; + } + + /* Try to create a new file but don't open existing files */ + fd = openat(parent_inode->fd, name, flags, mode); + err = fd == -1 ? errno : 0; + lo_restore_cred(&old, lo->change_umask); + if (!err) { + *open_fd = fd; + } + return err; +} + +static int do_create_secctx_fscreate(fuse_req_t req, + struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, int *open_fd) +{ + int err = 0, fd = -1, fscreate_fd = -1; + struct lo_data *lo = lo_data(req); + + err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen, + &fscreate_fd); + if (err) { + return err; + } + + err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false); + + close_reset_proc_fscreate(fscreate_fd); + if (!err) { + *open_fd = fd; + } + return err; +} + +static int do_create_secctx_tmpfile(fuse_req_t req, + struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, + const char *secctx_name, int *open_fd) +{ + int err, fd = -1; + struct lo_data *lo = lo_data(req); + char procname[64]; + + err = do_create_nosecctx(req, parent_inode, ".", mode, fi, &fd, true); + if (err) { + return err; + } + + err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0); + if (err) { + err = errno; + goto out; + } + + /* Security context set on file. Link it in place */ + sprintf(procname, "%d", fd); + FCHDIR_NOFAIL(lo->proc_self_fd); + err = linkat(AT_FDCWD, procname, parent_inode->fd, name, + AT_SYMLINK_FOLLOW); + err = err == -1 ? errno : 0; + FCHDIR_NOFAIL(lo->root.fd); + +out: + if (!err) { + *open_fd = fd; + } else if (fd != -1) { + close(fd); + } + return err; +} + +static int do_create_secctx_noatomic(fuse_req_t req, + struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, + const char *secctx_name, int *open_fd) +{ + int err = 0, fd = -1; + + err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false); + if (err) { + goto out; + } + + /* Set security context. This is not atomic w.r.t file creation */ + err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0); + err = err == -1 ? errno : 0; +out: + if (!err) { + *open_fd = fd; + } else { + if (fd != -1) { + close(fd); + unlinkat(parent_inode->fd, name, 0); + } + } + return err; +} + +static int do_lo_create(fuse_req_t req, struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, int *open_fd) +{ + struct lo_data *lo = lo_data(req); + char *mapped_name = NULL; + int err; + const char *ctxname = req->secctx.name; + bool secctx_enabled = req->secctx.ctxlen; + + if (secctx_enabled && lo->xattrmap) { + err = xattr_map_client(lo, req->secctx.name, &mapped_name); + if (err < 0) { + return -err; + } + + ctxname = mapped_name; + } + + if (secctx_enabled) { + /* + * If security.selinux has not been remapped and selinux is enabled, + * use fscreate to set context before file creation. If not, use + * tmpfile method for regular files. Otherwise fallback to + * non-atomic method of file creation and xattr settting. + */ + if (!mapped_name && lo->use_fscreate) { + err = do_create_secctx_fscreate(req, parent_inode, name, mode, fi, + open_fd); + goto out; + } else if (S_ISREG(mode)) { + err = do_create_secctx_tmpfile(req, parent_inode, name, mode, fi, + ctxname, open_fd); + /* + * If filesystem does not support O_TMPFILE, fallback to non-atomic + * method. + */ + if (!err || err != EOPNOTSUPP) { + goto out; + } + } + + err = do_create_secctx_noatomic(req, parent_inode, name, mode, fi, + ctxname, open_fd); + } else { + err = do_create_nosecctx(req, parent_inode, name, mode, fi, open_fd, + false); + } + +out: + g_free(mapped_name); + return err; +} + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, struct fuse_file_info *fi) { @@ -2010,7 +2358,6 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, struct lo_inode *inode = NULL; struct fuse_entry_param e; int err; - struct lo_cred old = {}; fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)" " kill_priv=%d\n", parent, name, fi->kill_priv); @@ -2026,18 +2373,9 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, return; } - err = lo_change_cred(req, &old, lo->change_umask); - if (err) { - goto out; - } - update_open_flags(lo->writeback, lo->allow_direct_io, fi); - /* Try to create a new file but don't open existing files */ - fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); - err = fd == -1 ? errno : 0; - - lo_restore_cred(&old, lo->change_umask); + err = do_lo_create(req, parent_inode, name, mode, fi, &fd); /* Ignore the error if file exists and O_EXCL was not given */ if (err && (err != EEXIST || (fi->flags & O_EXCL))) { @@ -2467,6 +2805,15 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, int res; (void)ino; + if (!(op & LOCK_NB)) { + /* + * Blocking flock can deadlock as there is only one thread + * serving the queue. + */ + fuse_reply_err(req, EOPNOTSUPP); + return; + } + res = flock(lo_fi_fd(req, fi), op); fuse_reply_err(req, res == -1 ? errno : 0); @@ -2842,11 +3189,6 @@ static int xattr_map_server(const struct lo_data *lo, const char *server_name, return -ENODATA; } -#define FCHDIR_NOFAIL(fd) do { \ - int fchdir_res = fchdir(fd); \ - assert(fchdir_res == 0); \ - } while (0) - static bool block_xattr(struct lo_data *lo, const char *name) { /* @@ -3357,6 +3699,49 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, } } +static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode) +{ + int fd, ret = 0; + + fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n", + inode->fuse_ino); + + fd = lo_inode_open(lo, inode, O_RDONLY); + if (fd < 0) { + return -fd; + } + + if (syncfs(fd) < 0) { + ret = errno; + } + + close(fd); + return ret; +} + +static void lo_syncfs(fuse_req_t req, fuse_ino_t ino) +{ + struct lo_data *lo = lo_data(req); + struct lo_inode *inode = lo_inode(req, ino); + int err; + + if (!inode) { + fuse_reply_err(req, EBADF); + return; + } + + err = lo_do_syncfs(lo, inode); + lo_inode_put(lo, &inode); + + /* + * If submounts aren't announced, the client only sends a request to + * sync the root inode. TODO: Track submounts internally and iterate + * over them as well. + */ + + fuse_reply_err(req, err); +} + static void lo_destroy(void *userdata) { struct lo_data *lo = (struct lo_data *)userdata; @@ -3417,6 +3802,7 @@ static struct fuse_lowlevel_ops lo_oper = { .copy_file_range = lo_copy_file_range, #endif .lseek = lo_lseek, + .syncfs = lo_syncfs, .destroy = lo_destroy, }; @@ -3508,6 +3894,15 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) exit(1); } + /* Get the /proc/self/task descriptor */ + lo->proc_self_task = open("/proc/self/task/", O_PATH); + if (lo->proc_self_task == -1) { + fuse_log(FUSE_LOG_ERR, "open(/proc/self/task, O_PATH): %m\n"); + exit(1); + } + + lo->use_fscreate = is_fscreate_usable(lo); + /* * We only need /proc/self/fd. Prevent ".." from accessing parent * directories of /proc/self/fd by bind-mounting it over /proc. Since / was @@ -3724,6 +4119,14 @@ static void setup_chroot(struct lo_data *lo) exit(1); } + lo->proc_self_task = open("/proc/self/task", O_PATH); + if (lo->proc_self_fd == -1) { + fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/task\", O_PATH): %m\n"); + exit(1); + } + + lo->use_fscreate = is_fscreate_usable(lo); + /* * Make the shared directory the file system root so that FUSE_OPEN * (lo_open()) cannot escape the shared directory by opening a symlink. @@ -3909,6 +4312,10 @@ static void fuse_lo_data_cleanup(struct lo_data *lo) close(lo->proc_self_fd); } + if (lo->proc_self_task >= 0) { + close(lo->proc_self_task); + } + if (lo->root.fd >= 0) { close(lo->root.fd); } @@ -3936,8 +4343,10 @@ int main(int argc, char *argv[]) .posix_lock = 0, .allow_direct_io = 0, .proc_self_fd = -1, + .proc_self_task = -1, .user_killpriv_v2 = -1, .user_posix_acl = -1, + .user_security_label = -1, }; struct lo_map_elem *root_elem; struct lo_map_elem *reserve_elem; diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c index 2bc0127b69..888295c073 100644 --- a/tools/virtiofsd/passthrough_seccomp.c +++ b/tools/virtiofsd/passthrough_seccomp.c @@ -111,6 +111,7 @@ static const int syscall_allowlist[] = { SCMP_SYS(set_robust_list), SCMP_SYS(setxattr), SCMP_SYS(symlinkat), + SCMP_SYS(syncfs), SCMP_SYS(time), /* Rarely needed, except on static builds */ SCMP_SYS(tgkill), SCMP_SYS(unlinkat), |