Skip to content

Commit

Permalink
Hadoop: update ranger (#5430)
Browse files Browse the repository at this point in the history
  • Loading branch information
tangyoupeng authored Dec 27, 2024
1 parent e1d0893 commit 448da08
Show file tree
Hide file tree
Showing 15 changed files with 631 additions and 278 deletions.
1 change: 0 additions & 1 deletion docs/en/deployment/hadoop_java_sdk.md
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,6 @@ JuiceFS currently supports path permission control by integrating with Apache Ra
|-----------------------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `juicefs.ranger-rest-url` | | `ranger`'s HTTP link url. Not configured means not using this feature. |
| `juicefs.ranger-service-name` | | `ranger`'s `service name` in `HDFS` module, required |
| `juicefs.ranger-cache-dir` | | `ranger`'s policies cache path. By default, a `UUID` path hierarchy is added under the environment variable `java.io.tmpdir` to prevent multitasking from interfering with each other. After configuring a fixed directory, multiple tasks will share the cache, and only one JuiceFS is responsible for cache refreshing, to reduce the pressure on connecting to `Ranger Admin`. |
| `juicefs.ranger-poll-interval-ms` | `30000` | `ranger`'s interval to refresh cache, default is 30s |

### 2. Dependencies
Expand Down
1 change: 0 additions & 1 deletion docs/zh_cn/deployment/hadoop_java_sdk.md
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,6 @@ JuiceFS 当前支持对接 Apache Ranger 的 `HDFS` 模块进行路径的权限
|-----------------------------------|----------|--------------------------------------------------------------------------------------------------------------------------------|
| `juicefs.ranger-rest-url` | | `ranger`连接地址。不配置该参数即不使用该功能。 |
| `juicefs.ranger-service-name` | | `ranger`中配置的`service name`,必填 |
| `juicefs.ranger-cache-dir` | | `ranger`策略的缓存路径。默认在环境变量`java.io.tmpdir`下,添加`UUID`路径层级防止多任务相互影响。当配置固定目录后,多个任务会共享缓存,有且仅有一个JuiceFS对象负责缓存刷新,减少对连接`Ranger Admin`压力。 |
| `juicefs.ranger-poll-interval-ms` | `30000` | `ranger`缓存刷新周期,默认30s |

### 2. 环境及依赖
Expand Down
14 changes: 14 additions & 0 deletions sdk/java/libjfs/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,20 @@ func jfs_update_uid_grouping(cname, uidstr *C.char, grouping *C.char) {
}
}

//export jfs_getGroups
func jfs_getGroups(name, user string) string {
fslock.Lock()
defer fslock.Unlock()
userGroups := userGroupCache[name]
if userGroups != nil {
gs := userGroups[user]
if gs != nil {
return strings.Join(gs, ",")
}
}
return ""
}

//export jfs_term
func jfs_term(pid int, h int64) int {
w := F(h)
Expand Down
33 changes: 28 additions & 5 deletions sdk/java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@
<pattern>com.google.common</pattern>
<shadedPattern>io.juicefs.shaded.com.google.common</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.commons.lang</pattern>
<shadedPattern>io.juicefs.shaded.org.apache.commons.lang</shadedPattern>
</relocation>
<relocation>
<pattern>com.kstruct.gethostname4j</pattern>
<shadedPattern>io.juicefs.shaded.com.kstruct.gethostname4j</shadedPattern>
</relocation>
</relocations>
</configuration>
</plugin>
Expand Down Expand Up @@ -350,6 +358,11 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>org.apache.ranger</groupId>
<artifactId>ranger-plugins-common</artifactId>
Expand All @@ -361,6 +374,21 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.kstruct</groupId>
<artifactId>gethostname4j</artifactId>
<version>0.0.2</version>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-bundle</artifactId>
<version>1.19.3</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-jaxrs</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>org.apache.ranger</groupId>
<artifactId>ranger-plugins-audit</artifactId>
Expand All @@ -372,11 +400,6 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
</dependencies>

<distributionManagement>
Expand Down
97 changes: 49 additions & 48 deletions sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,16 @@ static String loadVersion() {
private Path workingDir;
private String name;
private String user;
private String group;
private Set<String> groups;
private String superuser;
private String supergroup;
private URI uri;
private long blocksize;
private int minBufferSize;
private int cacheReplica;
private boolean fileChecksumEnabled;
private static boolean permissionCheckEnabled = false;
private final boolean isSuperGroupFileSystem;
private boolean isBackGroundTask = false;

private JuiceFileSystemImpl superGroupFileSystem;
private RangerPermissionChecker rangerPermissionChecker;
private static Libjfs lib = loadLibrary();
Expand Down Expand Up @@ -207,6 +206,8 @@ public static interface Libjfs {

int jfs_setfacl(long pid, long h, String path, int acltype, Pointer b, int len);

String jfs_getGroups(String volName, String user);

void jfs_set_callback(LogCallBack callBack);

interface LogCallBack {
Expand Down Expand Up @@ -369,22 +370,18 @@ public void initialize(URI uri, Configuration conf) throws IOException {
minBufferSize = conf.getInt("juicefs.min-buffer-size", 128 << 10);
cacheReplica = Integer.parseInt(getConf(conf, "cache-replica", "1"));
fileChecksumEnabled = Boolean.parseBoolean(getConf(conf, "file.checksum", "false"));
permissionCheckEnabled = getConf(conf, "ranger-rest-url", null) != null;

this.ugi = UserGroupInformation.getCurrentUser();
user = ugi.getShortUserName();
group = "nogroup";
String groupingFile = getConf(conf, "groups", null);
if (isEmpty(groupingFile) && ugi.getGroupNames().length > 0) {
group = String.join(",", ugi.getGroupNames());
String groupStr = "nogroup";
if (ugi.getGroupNames().length > 0) {
groupStr = String.join(",", ugi.getGroupNames());
}
groups = Arrays.stream(group.split(",")).collect(Collectors.toSet());
superuser = getConf(conf, "superuser", "hdfs");
supergroup = getConf(conf, "supergroup", conf.get("dfs.permissions.superusergroup", "supergroup"));
if (permissionCheckEnabled && isSuperGroupFileSystem) {
group = supergroup;
groups.clear();
groups.add(supergroup);
isBackGroundTask = conf.getBoolean("juicefs.internal-bg-task", false);
if (isSuperGroupFileSystem || isBackGroundTask) {
groupStr = supergroup;
}

synchronized (JuiceFileSystemImpl.class) {
Expand Down Expand Up @@ -445,12 +442,11 @@ public void initialize(URI uri, Configuration conf) throws IOException {
obj.put("freeSpace", getConf(conf, "free-space", "0.1"));
obj.put("accessLog", getConf(conf, "access-log", ""));
String jsonConf = obj.toString(2);
handle = lib.jfs_init(name, jsonConf, user, group, superuser, supergroup);
handle = lib.jfs_init(name, jsonConf, user, groupStr, superuser, supergroup);
if (handle <= 0) {
throw new IOException("JuiceFS initialized failed for jfs://" + name);
}
boolean asBgTask = conf.getBoolean("juicefs.internal-bg-task", false);
if (asBgTask) {
if (isBackGroundTask) {
LOG.debug("background fs {}|({})", name, handle);
} else {
BgTaskUtil.register(name, handle);
Expand Down Expand Up @@ -500,36 +496,30 @@ public void initialize(URI uri, Configuration conf) throws IOException {
JuiceFSInstrumentation.init(this, statistics);
}

if (permissionCheckEnabled) {
try {
if (!isSuperGroupFileSystem) {
RangerConfig rangerConfig = checkAndGetRangerParams(conf);
Configuration superConf = new Configuration(conf);
superGroupFileSystem = new JuiceFileSystemImpl(true);
superGroupFileSystem.initialize(uri, superConf);
rangerPermissionChecker = new RangerPermissionChecker(superGroupFileSystem, rangerConfig, user, group);
}
} catch (Exception e) {
if (rangerPermissionChecker != null) {
rangerPermissionChecker.cleanUp();
}
throw new RuntimeException("The initialization of the Permission Checker has failed. ", e);
}

String rangerRestUrl = getConf(conf, "ranger-rest-url", null);
if (!isEmpty(rangerRestUrl) && !isSuperGroupFileSystem && !isBackGroundTask) {
RangerConfig rangerConfig = checkAndGetRangerParams(rangerRestUrl, conf);
Configuration superConf = new Configuration(conf);
superConf.set("juicefs.internal-bg-task", "true");
superGroupFileSystem = new JuiceFileSystemImpl(true);
superGroupFileSystem.initialize(uri, superConf);
rangerPermissionChecker = RangerPermissionChecker.acquire(name, handle, superGroupFileSystem, rangerConfig);
}

if (!asBgTask && !isSuperGroupFileSystem) {
if (!isBackGroundTask && !isSuperGroupFileSystem) {
// use juicefs.users and juicefs.groups for global mapping
String uidFile = getConf(conf, "users", null);
if (!isEmpty(uidFile) || !isEmpty(groupingFile)) {
String groupFile = getConf(conf, "groups", null);
if (!isEmpty(uidFile) || !isEmpty(groupFile)) {
BgTaskUtil.putTask(name, "Refresh guid", () -> {
updateUidAndGrouping(uidFile, groupingFile);
updateUidAndGrouping(uidFile, groupFile);
}, 1, 1, TimeUnit.MINUTES);
}
}
}

private RangerConfig checkAndGetRangerParams(Configuration conf) throws RuntimeException, IOException {
String rangerRestUrl = getConf(conf, "ranger-rest-url", "");
private RangerConfig checkAndGetRangerParams(String rangerRestUrl, Configuration conf) throws IOException {
if (!rangerRestUrl.startsWith("http")) {
throw new IOException("illegal value for parameter 'juicefs.ranger-rest-url': " + rangerRestUrl);
}
Expand All @@ -539,38 +529,52 @@ private RangerConfig checkAndGetRangerParams(Configuration conf) throws RuntimeE
throw new IOException("illegal value for parameter 'juicefs.ranger-service-name': " + serviceName);
}

String cacheDir = getConf(conf, "ranger-cache-dir", System.getProperty("java.io.tmpdir") + "/" + UUID.randomUUID());
String pollIntervalMs = getConf(conf, "ranger-poll-interval-ms", "30000");

return new RangerConfig(rangerRestUrl, serviceName, cacheDir, pollIntervalMs);
return new RangerConfig(rangerRestUrl, serviceName, Long.parseLong(pollIntervalMs));
}

private JuiceFileSystemImpl(boolean isSuperGroupFileSystem) {
this.isSuperGroupFileSystem = isSuperGroupFileSystem;
}

private Set<String> getGroups() {
String groupsFile = getConf(getConf(), "groups", null);
if (isEmpty(groupsFile)) {
return new HashSet<>(ugi.getGroups());
}
String gStr = lib.jfs_getGroups(name, user);
Set<String> res;
if (!isEmpty(gStr)) {
res = new HashSet<>(Arrays.asList(gStr.split(","))) ;
} else {
res = new HashSet<>(ugi.getGroups());
}
return res;
}

private boolean hasSuperPermission() {
return user.equals(superuser) || groups.contains(supergroup);
return user.equals(superuser) || getGroups().contains(supergroup);
}

private boolean needCheckPermission() {
return permissionCheckEnabled && !hasSuperPermission();
return rangerPermissionChecker != null && !isSuperGroupFileSystem && !isBackGroundTask && !hasSuperPermission() ;
}

private boolean checkPathAccess(Path path, FsAction action, String operation) throws IOException {
return rangerPermissionChecker.checkPermission(path, false, null, null, action, operation);
return rangerPermissionChecker.checkPermission(path, false, null, null, action, operation, user, getGroups());
}

private boolean checkParentPathAccess(Path path, FsAction action, String operation) throws IOException {
return rangerPermissionChecker.checkPermission(path, false, null, action, null, operation);
return rangerPermissionChecker.checkPermission(path, false, null, action, null, operation, user, getGroups());
}

private boolean checkAncestorAccess(Path path, FsAction action, String operation) throws IOException {
return rangerPermissionChecker.checkPermission(path, false, action, null, null, operation);
return rangerPermissionChecker.checkPermission(path, false, action, null, null, operation, user, getGroups());
}

private boolean checkOwner(Path path, String operation) throws IOException {
return rangerPermissionChecker.checkPermission(path, true, null, null, null, operation);
return rangerPermissionChecker.checkPermission(path, true, null, null, null, operation, user, getGroups());
}

private boolean isEmpty(String str) {
Expand Down Expand Up @@ -623,7 +627,6 @@ private void updateUidAndGrouping(String uidFile, String groupFile) throws IOExc
}

lib.jfs_update_uid_grouping(name, uidstr, grouping);
groups = Arrays.stream(group.split(",")).collect(Collectors.toSet());
}

private void initializeStorageIds(Configuration conf) throws IOException {
Expand Down Expand Up @@ -1858,6 +1861,7 @@ public void setTimes(Path p, long mtime, long atime) throws IOException {
@Override
public void close() throws IOException {
super.close();
RangerPermissionChecker.release(name, handle);
BgTaskUtil.unregister(name, handle, () -> {
cachedHostsForName.clear();
hashForName.clear();
Expand All @@ -1868,9 +1872,6 @@ public void close() throws IOException {
if (metricsEnable) {
JuiceFSInstrumentation.close();
}
if (rangerPermissionChecker != null) {
rangerPermissionChecker.cleanUp();
}
}

@Override
Expand Down
50 changes: 0 additions & 50 deletions sdk/java/src/main/java/io/juicefs/permission/LockFileChecker.java

This file was deleted.

Loading

0 comments on commit 448da08

Please sign in to comment.