aboutsummaryrefslogtreecommitdiff
path: root/src/fs.h
blob: bc36636084c9be024560ede70b64cd721b10c050 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
// Copyright (c) 2017-2021 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#ifndef BITCOIN_FS_H
#define BITCOIN_FS_H

#include <stdio.h>
#include <string>
#if defined WIN32 && defined __GLIBCXX__
#include <ext/stdio_filebuf.h>
#endif

#include <boost/filesystem.hpp>
#include <boost/filesystem/fstream.hpp>
#include <tinyformat.h>

/** Filesystem operations and types */
namespace fs {

using namespace boost::filesystem;

/**
 * Path class wrapper to prepare application code for transition from
 * boost::filesystem library to std::filesystem implementation. The main
 * purpose of the class is to define fs::path::u8string() and fs::u8path()
 * functions not present in boost. It also blocks calls to the
 * fs::path(std::string) implicit constructor and the fs::path::string()
 * method, which worked well in the boost::filesystem implementation, but have
 * unsafe and unpredictable behavior on Windows in the std::filesystem
 * implementation (see implementation note in \ref PathToString for details).
 */
class path : public boost::filesystem::path
{
public:
    using boost::filesystem::path::path;

    // Allow path objects arguments for compatibility.
    path(boost::filesystem::path path) : boost::filesystem::path::path(std::move(path)) {}
    path& operator=(boost::filesystem::path path) { boost::filesystem::path::operator=(std::move(path)); return *this; }
    path& operator/=(boost::filesystem::path path) { boost::filesystem::path::operator/=(std::move(path)); return *this; }

    // Allow literal string arguments, which are safe as long as the literals are ASCII.
    path(const char* c) : boost::filesystem::path(c) {}
    path& operator=(const char* c) { boost::filesystem::path::operator=(c); return *this; }
    path& operator/=(const char* c) { boost::filesystem::path::operator/=(c); return *this; }
    path& append(const char* c) { boost::filesystem::path::append(c); return *this; }

    // Disallow std::string arguments to avoid locale-dependent decoding on windows.
    path(std::string) = delete;
    path& operator=(std::string) = delete;
    path& operator/=(std::string) = delete;
    path& append(std::string) = delete;

    // Disallow std::string conversion method to avoid locale-dependent encoding on windows.
    std::string string() const = delete;

    // Define UTF-8 string conversion method not present in boost::filesystem but present in std::filesystem.
    std::string u8string() const { return boost::filesystem::path::string(); }
};

// Define UTF-8 string conversion function not present in boost::filesystem but present in std::filesystem.
static inline path u8path(const std::string& string)
{
    return boost::filesystem::path(string);
}

// Disallow implicit std::string conversion for system_complete to avoid
// locale-dependent encoding on windows.
static inline path system_complete(const path& p)
{
    return boost::filesystem::system_complete(p);
}

// Disallow implicit std::string conversion for exists to avoid
// locale-dependent encoding on windows.
static inline bool exists(const path& p)
{
    return boost::filesystem::exists(p);
}

// Allow explicit quoted stream I/O.
static inline auto quoted(const std::string& s)
{
    return boost::io::quoted(s, '&');
}

// Allow safe path append operations.
static inline path operator+(path p1, path p2)
{
    p1 += static_cast<boost::filesystem::path&&>(p2);
    return p1;
}

// Disallow implicit std::string conversion for copy_file
// to avoid locale-dependent encoding on Windows.
static inline void copy_file(const path& from, const path& to, copy_option options)
{
    boost::filesystem::copy_file(from, to, options);
}

/**
 * Convert path object to byte string. On POSIX, paths natively are byte
 * strings, so this is trivial. On Windows, paths natively are Unicode, so an
 * encoding step is necessary. The inverse of \ref PathToString is \ref
 * PathFromString. The strings returned and parsed by these functions can be
 * used to call POSIX APIs, and for roundtrip conversion, logging, and
 * debugging.
 *
 * Because \ref PathToString and \ref PathFromString functions don't specify an
 * encoding, they are meant to be used internally, not externally. They are not
 * appropriate to use in applications requiring UTF-8, where
 * fs::path::u8string() and fs::u8path() methods should be used instead. Other
 * applications could require still different encodings. For example, JSON, XML,
 * or URI applications might prefer to use higher level escapes (\uXXXX or
 * &XXXX; or %XX) instead of multibyte encoding. Rust, Python, Java applications
 * may require encoding paths with their respective UTF-8 derivatives WTF-8,
 * PEP-383, and CESU-8 (see https://en.wikipedia.org/wiki/UTF-8#Derivatives).
 */
static inline std::string PathToString(const path& path)
{
    // Implementation note: On Windows, the std::filesystem::path(string)
    // constructor and std::filesystem::path::string() method are not safe to
    // use here, because these methods encode the path using C++'s narrow
    // multibyte encoding, which on Windows corresponds to the current "code
    // page", which is unpredictable and typically not able to represent all
    // valid paths. So std::filesystem::path::u8string() and
    // std::filesystem::u8path() functions are used instead on Windows. On
    // POSIX, u8string/u8path functions are not safe to use because paths are
    // not always valid UTF-8, so plain string methods which do not transform
    // the path there are used.
#ifdef WIN32
    return path.u8string();
#else
    static_assert(std::is_same<path::string_type, std::string>::value, "PathToString not implemented on this platform");
    return path.boost::filesystem::path::string();
#endif
}

/**
 * Convert byte string to path object. Inverse of \ref PathToString.
 */
static inline path PathFromString(const std::string& string)
{
#ifdef WIN32
    return u8path(string);
#else
    return boost::filesystem::path(string);
#endif
}
} // namespace fs

/** Bridge operations to C stdio */
namespace fsbridge {
    FILE *fopen(const fs::path& p, const char *mode);

    /**
     * Helper function for joining two paths
     *
     * @param[in] base  Base path
     * @param[in] path  Path to combine with base
     * @returns path unchanged if it is an absolute path, otherwise returns base joined with path. Returns base unchanged if path is empty.
     * @pre  Base path must be absolute
     * @post Returned path will always be absolute
     */
    fs::path AbsPathJoin(const fs::path& base, const fs::path& path);

    class FileLock
    {
    public:
        FileLock() = delete;
        FileLock(const FileLock&) = delete;
        FileLock(FileLock&&) = delete;
        explicit FileLock(const fs::path& file);
        ~FileLock();
        bool TryLock();
        std::string GetReason() { return reason; }

    private:
        std::string reason;
#ifndef WIN32
        int fd = -1;
#else
        void* hFile = (void*)-1; // INVALID_HANDLE_VALUE
#endif
    };

    std::string get_filesystem_error_message(const fs::filesystem_error& e);

    // GNU libstdc++ specific workaround for opening UTF-8 paths on Windows.
    //
    // On Windows, it is only possible to reliably access multibyte file paths through
    // `wchar_t` APIs, not `char` APIs. But because the C++ standard doesn't
    // require ifstream/ofstream `wchar_t` constructors, and the GNU library doesn't
    // provide them (in contrast to the Microsoft C++ library, see
    // https://stackoverflow.com/questions/821873/how-to-open-an-stdfstream-ofstream-or-ifstream-with-a-unicode-filename/822032#822032),
    // Boost is forced to fall back to `char` constructors which may not work properly.
    //
    // Work around this issue by creating stream objects with `_wfopen` in
    // combination with `__gnu_cxx::stdio_filebuf`. This workaround can be removed
    // with an upgrade to C++17, where streams can be constructed directly from
    // `std::filesystem::path` objects.

#if defined WIN32 && defined __GLIBCXX__
    class ifstream : public std::istream
    {
    public:
        ifstream() = default;
        explicit ifstream(const fs::path& p, std::ios_base::openmode mode = std::ios_base::in) { open(p, mode); }
        ~ifstream() { close(); }
        void open(const fs::path& p, std::ios_base::openmode mode = std::ios_base::in);
        bool is_open() { return m_filebuf.is_open(); }
        void close();

    private:
        __gnu_cxx::stdio_filebuf<char> m_filebuf;
        FILE* m_file = nullptr;
    };
    class ofstream : public std::ostream
    {
    public:
        ofstream() = default;
        explicit ofstream(const fs::path& p, std::ios_base::openmode mode = std::ios_base::out) { open(p, mode); }
        ~ofstream() { close(); }
        void open(const fs::path& p, std::ios_base::openmode mode = std::ios_base::out);
        bool is_open() { return m_filebuf.is_open(); }
        void close();

    private:
        __gnu_cxx::stdio_filebuf<char> m_filebuf;
        FILE* m_file = nullptr;
    };
#else  // !(WIN32 && __GLIBCXX__)
    typedef fs::ifstream ifstream;
    typedef fs::ofstream ofstream;
#endif // WIN32 && __GLIBCXX__
};

// Disallow path operator<< formatting in tinyformat to avoid locale-dependent
// encoding on windows.
namespace tinyformat {
template<> inline void formatValue(std::ostream&, const char*, const char*, int, const boost::filesystem::path&) = delete;
template<> inline void formatValue(std::ostream&, const char*, const char*, int, const fs::path&) = delete;
} // namespace tinyformat

#endif // BITCOIN_FS_H