From 19a570c9df89ad0450cb4320943f2616dffa2f2a Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 21 Jan 2022 08:33:05 -0500 Subject: [PATCH 1/3] revert me on resolution of irods/irods#6100. --- libirods_rule_engine_plugin-indexing.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libirods_rule_engine_plugin-indexing.cpp b/libirods_rule_engine_plugin-indexing.cpp index 3b979e4..7c8aa4e 100644 --- a/libirods_rule_engine_plugin-indexing.cpp +++ b/libirods_rule_engine_plugin-indexing.cpp @@ -217,9 +217,10 @@ namespace { &obj_inp->condInput, DEST_RESC_HIER_STR_KW); if(!resc_hier) { - THROW(SYS_INVALID_INPUT_PARAM, "resc hier is null"); + const auto Message = boost::str( + boost::format("Will not initiate full-text indexing of new REPL (object path '%s') because resc hier is null [irods/irods#6100].") % object_path); + THROW(SYS_INVALID_INPUT_PARAM, Message.c_str()); } - irods::hierarchy_parser parser; parser.set_string(resc_hier); parser.last_resc(source_resource); From 932848d5fb4798d208836e314b8b9a9f3c3ae6d6 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 30 Mar 2022 00:44:00 +0000 Subject: [PATCH 2/3] [_81] implement user and group permissions for metadata schema --- es_mapping.json | 14 ++ indexing.cmake | 1 + indexing_utilities.cpp | 221 +++++++++++++------ libirods_rule_engine_plugin-indexing.cpp | 257 ++++++++++++++++++++++- packaging/atomic_acl_ops.py | 56 +++++ packaging/atomic_metadata_ops.py | 2 +- packaging/test_plugin_indexing.py | 153 +++++++++++++- path_calc.hpp | 95 +++++++++ 8 files changed, 730 insertions(+), 69 deletions(-) create mode 100755 packaging/atomic_acl_ops.py create mode 100644 path_calc.hpp diff --git a/es_mapping.json b/es_mapping.json index b13ffeb..6f30e39 100644 --- a/es_mapping.json +++ b/es_mapping.json @@ -27,6 +27,20 @@ "type": "date", "format": "epoch_second" }, + "creator": { + "type": "keyword" + }, + "userPermissions": { + "type": "nested", + "properties": { + "permission": { + "type": "keyword" + }, + "user": { + "type": "keyword" + } + } + }, "metadataEntries": { "type": "nested", "properties": { diff --git a/indexing.cmake b/indexing.cmake index 3a35c1c..ec9c6b1 100644 --- a/indexing.cmake +++ b/indexing.cmake @@ -66,6 +66,7 @@ install( FILES ${CMAKE_SOURCE_DIR}/packaging/test_plugin_indexing.py ${CMAKE_SOURCE_DIR}/packaging/atomic_metadata_ops.py + ${CMAKE_SOURCE_DIR}/packaging/atomic_acl_ops.py DESTINATION ${IRODS_HOME_DIRECTORY}/scripts/irods/test PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ COMPONENT ${IRODS_PACKAGE_COMPONENT_POLICY_NAME} diff --git a/indexing_utilities.cpp b/indexing_utilities.cpp index 027141d..3e7db7a 100644 --- a/indexing_utilities.cpp +++ b/indexing_utilities.cpp @@ -36,7 +36,11 @@ #include #include "json.hpp" #include "cpp_json_kw.hpp" +#include "path_calc.hpp" +#include +#include +#include using namespace std::string_literals; @@ -242,6 +246,25 @@ namespace irods { generate_delay_execution_parameters()); } + struct index_info + { + public: + index_info ( const std::string & index_name_ + ,const std::string & index_type_ + ,const std::string & index_tech_ ) + : index_name {index_name_} + ,index_type {index_type_} + ,index_tech {index_tech_} + { + } + bool operator< (const index_info & other) const { return index_name < other.index_name || + index_type < other.index_type || + index_tech < other.index_tech; } + std::string index_name; + std::string index_type; + std::string index_tech; + }; + // - Starting at _collection_name , recurse over every sub-element of the tree // - (including data objects and collections and starting with the root). // - Call schedule_policy_event_for_object for every object or collection @@ -258,6 +281,45 @@ namespace irods { using fsp = fs::path; rsComm_t& comm = *rei_->rsComm; + auto calculate_indexing_avus = [&] (const std::string & collname) + { + try { + std::set s; + irods::query q {&comm, + fmt::format("select META_COLL_ATTR_NAME,META_COLL_ATTR_VALUE,META_COLL_ATTR_UNITS where" + " COLL_NAME = '{}' and META_COLL_ATTR_NAME = '{}' " + " and META_COLL_ATTR_VALUE like '%::metadata'", collname, config_.index)}; + for (const auto &row : q) { + std::string idx_name, idx_type, idx_tech; + std::tie(idx_name, idx_type) = irods::indexing::parse_indexer_string(row[1]); + idx_tech = row[2]; + s.insert({idx_name, idx_type, idx_tech}); + } + return s; + } + catch (const std::exception& e) { + THROW( SYS_LIBRARY_ERROR , fmt::format("Cannot recover from library error - {}",e.what())); + } + catch (...) { + THROW( SYS_UNKNOWN_ERROR , "Reached unrecoverable state"); + } + }; + + path_calc_and_cache idx_info_cache{ calculate_indexing_avus }; + + bool search_parent_tags = (_indexer.empty() && _index_name.empty()); + + // The mode defined by (search_parent_tags == true) causes the set of applicable indexing AVU tags + // to be recomputed at every point of iteration in the collection tree (and then cached for the benefit + // of objects within any sub-paths). This is useful for the case of ichmod -r (metadata index-type only), + // and that is currently the only application of this mode. + + if (search_parent_tags && (_index_type != "metadata")) { + irods::log(LOG_ERROR, fmt::format("In function {} line {} - inappropriate use of search_parent_tags mode outside of 'metadata' index-type", + __func__, __LINE__)); + return; + } + const auto indexing_resources = get_indexing_resource_names(); const auto policy_name = operation_and_index_types_to_policy_name( _operation_type, @@ -308,80 +370,107 @@ namespace irods { struct query_failed : public std::runtime_error { query_failed( const std::string& e = "Query failed to fetch # of jobs active" ) - : std::runtime_error{e} {} + : std::runtime_error{e} {} }; struct job_limit_precision : public std::runtime_error { job_limit_precision( const std::string& e = "Job Limits may not exceed 32-bit unsigned integer precision" ) - : std::runtime_error{e} {} + : std::runtime_error{e} {} }; + try { + for (auto path = start_path; ; ++iter) { + const auto s = fsvr::status(comm,path); + bool is_collection = fsvr::is_collection(s); + bool is_data_object = fsvr::is_data_object(s); + if (is_data_object || is_collection) { + try { + std::string resc_name; + if (is_data_object) { + resc_name = get_indexing_resource_name_for_object( + path.string(), + indexing_resources); + } - for (auto path = start_path; ; ++iter) { - const auto s = fsvr::status(comm,path); - bool is_collection = fsvr::is_collection(s); - bool is_data_object = fsvr::is_data_object(s); - if (is_data_object || is_collection) { - try { - std::string resc_name; - if (is_data_object) { - resc_name = get_indexing_resource_name_for_object( - path.string(), - indexing_resources); - } - - if (job_limit > 0 && n_jobs >= job_limit) { - // The job limit parameter should be a large number, in the thousands or more perhaps, but small - // enough so that indexing your largest collections doesn't fill up all of virtual memory. - for(;;) { - query qobj{comm_, JOB_QUERY_STRING, 1}; - for (const auto & row: qobj) { - auto count = std::stol( row[0] ); - if (count > job_max) { throw job_limit_precision{}; } - n_jobs = count; - break; - } - // The approach to throttling is simply to wait until the number of delayed tasks falls - // down to the LOW_WATER_MARK and then exit the wait loop to fill up the task queue again. - // Because we're already in a delayed task, this does not impact the plugin's response time. - if (n_jobs > LOW_WATER_MARK) { - sleep(1); + // if job limit is exceeded, wait before spawning more jobs + + if (job_limit > 0 && n_jobs >= job_limit) { + // The job limit parameter should be a large number, in the thousands or more perhaps, but small + // enough so that indexing your largest collections doesn't fill up all of virtual memory. + for(;;) { + query qobj{comm_, JOB_QUERY_STRING, 1}; + for (const auto & row: qobj) { + auto count = std::stol( row[0] ); + if (count > job_max) { throw job_limit_precision{}; } + n_jobs = count; + break; + } + // The approach to throttling is simply to wait until the number of delayed tasks falls + // down to the LOW_WATER_MARK and then exit the wait loop to fill up the task queue again. + // Because we're already in a delayed task, this does not impact the plugin's response time. + if (n_jobs > LOW_WATER_MARK) { + sleep(1); + } + else { + break; + } } - else { - break; + } + + if ( search_parent_tags ) { + // - Note: this mode is currently used only in support of ichmod, ie. with the metadata index-type. + auto info_set = idx_info_cache.accum( path.string() ); + for (auto info : info_set) { + schedule_policy_event_for_object( + policy_name, + path.string(), + _user_name, + EMPTY_RESOURCE_NAME, + info.index_tech, + info.index_name, + info.index_type, + generate_delay_execution_parameters(), + {},{},{}, + {{ "job_category_tag", unique_key }} ); + ++n_jobs; } } + else if (!(is_collection && _index_type == "full_text")) { // full_text is meaningless for collection objects + schedule_policy_event_for_object( + policy_name, + path.string(), + _user_name, + EMPTY_RESOURCE_NAME, + _indexer, + _index_name, + _index_type, + generate_delay_execution_parameters(), + {},{},{}, + {{ "job_category_tag", unique_key }} ); + + ++n_jobs; + } } - - if ( ! (is_collection && _index_type == "full_text" )) { - schedule_policy_event_for_object( - policy_name, - path.string(), - _user_name, - EMPTY_RESOURCE_NAME, - _indexer, - _index_name, - _index_type, - generate_delay_execution_parameters(), - {},{},{}, - {{ "job_category_tag", unique_key }} ); - - ++n_jobs; + catch(const exception& _e) { + rodsLog( + LOG_ERROR, + "failed to find indexing resource (error code=[%ld]) for object [%s]", static_cast(_e.code()), + path.c_str()); } - } - catch(const exception& _e) { - rodsLog( - LOG_ERROR, - "failed to find indexing resource (error code=[%ld]) for object [%s]",static_cast(_e.code()), - path.string().c_str()); - } - catch (const std::runtime_error & e) { - irods::log(LOG_ERROR,fmt::format("Abort indexing collection: {}",e.what())); - break; - } - if (iter != iter_end) { path = iter->path(); } - else { break; } - } // if collection or data object - } // for path + catch (const std::runtime_error & e) { + irods::log(LOG_ERROR, fmt::format("Abort indexing collection: {}", e.what())); + break; + } + if (iter != iter_end) { path = iter->path(); } + else { break; } + } // if collection or data object + } // for path + } + catch(const std::exception & e) { + rodsLog(LOG_ERROR, "file [%s] function [%s] line [%d] General exception - %s ", __FILE__ , __func__, __LINE__, e.what()); + } + catch(...) { + rodsLog(LOG_ERROR, "file [%s] function [%s] line [%d] Unknown error", __FILE__ , __func__, __LINE__); + } } // schedule_policy_events_for_collection /* @@ -418,9 +507,9 @@ namespace irods { void indexer::schedule_metadata_indexing_event( const std::string& _object_path, const std::string& _user_name, - const std::string& _attribute, - const std::string& _value, - const std::string& _units) { + const std::string& _attribute, // Note that _attribute, _value, and _units no longer matter because the metadata indexing operation for + const std::string& _value, // use of the NIEHS elasticsearch schema collects all of an object's AVU's into the same record. + const std::string& _units) { // Every AVU on an object is re-queried and collected into the index, regardless. schedule_policy_events_given_object_path( irods::indexing::operation_type::index, diff --git a/libirods_rule_engine_plugin-indexing.cpp b/libirods_rule_engine_plugin-indexing.cpp index 7c8aa4e..e84453d 100644 --- a/libirods_rule_engine_plugin-indexing.cpp +++ b/libirods_rule_engine_plugin-indexing.cpp @@ -8,6 +8,8 @@ #include "irods_hierarchy_parser.hpp" #include "irods_resource_backport.hpp" #include "rsModAVUMetadata.hpp" +#include "modAccessControl.h" + #define IRODS_FILESYSTEM_ENABLE_SERVER_SIDE_API #include "filesystem.hpp" @@ -33,6 +35,7 @@ #include #include #include +#include // =-=-=-=-=-=-=- // boost includes @@ -81,6 +84,7 @@ namespace { //- and any subobjects from all indices so computed, even if some of them don't //- refer to the object(s). + auto get_indices_for_delete_by_query (rsComm_t& comm, const std::string &_object_name, const bool recurs) -> std::set { using irods::indexing::parse_indexer_string; @@ -412,6 +416,46 @@ namespace { if (auto* p = getValByKey( &obj_inp->condInput, FORCE_FLAG_KW); p != 0) { rm_force_kw = p; } indices_for_rm_coll = get_indices_for_delete_by_query (*_rei->rsComm, obj_inp->objPath, false); } + else if("pep_api_mod_access_control_post" == _rn ) { + irods::indexing::indexer idx{_rei, config->instance_name_}; + auto it = _args.begin(); + std::advance(it, 2); + if(_args.end() == it) { + THROW( + SYS_INVALID_INPUT_PARAM, + "invalid number of arguments"); + } + try { + const auto* access_ctl = boost::any_cast(*it); + if (access_ctl) { + const auto &[recursive, level_, userN_, zone_, logical_path] = *access_ctl; + + namespace fsvr = irods::experimental::filesystem::server; + if (recursive && fsvr::is_collection(*_rei->rsComm,logical_path)) { + idx.schedule_collection_operation( + irods::indexing::operation_type::index, + logical_path, + _rei->rsComm->clientUser.userName, + "::metadata", //value // -> Empty fields for index name, type, and technology + "" //units // technology will signal that the collection operation + ); // should search upward in the hierarchy for indexing tags. + } + else { + idx.schedule_metadata_indexing_event( + logical_path, + _rei->rsComm->clientUser.userName, + "null", // attribute, // -> the actual values are irrelevant now that we're using the + "bb", // value, // NIEHS metadata schema + "cc" // units + ); + } + } + } + catch(const std::exception &e) { + const char* message = e.what(); + rodsLog(LOG_NOTICE,"Exception during pep_api_data_obj_unlink_post: %s",message); + } + } else if("pep_api_data_obj_unlink_post" == _rn) { auto it = _args.begin(); std::advance(it, 2); @@ -474,6 +518,21 @@ namespace { idx.schedule_metadata_purge_for_recursive_rm_object(obj_inp->collName, recurseInfo); } } + else if("pep_api_atomic_apply_acl_operations_post" == _rn) { + auto it = _args.begin(); + std::advance(it, 2); + auto request = boost::any_cast(*it); + std::string requ_str {(const char*)request->buf,unsigned(request->len)}; + const auto requ_json = nlohmann::json::parse( requ_str ); + const auto obj_path = requ_json["logical_path"].get(); + irods::indexing::indexer idx{_rei, config->instance_name_}; + idx.schedule_metadata_indexing_event( + obj_path, + _rei->rsComm->clientUser.userName, + "attribute", + "value", + "units"); + } else if (_rn == "pep_api_atomic_apply_metadata_operations_pre" || _rn == "pep_api_atomic_apply_metadata_operations_post") { @@ -520,7 +579,7 @@ namespace { const auto & pre_map = atomic_metadata_tuples[ "pre" ]; set_symmetric_difference ( pre_map.begin(), pre_map.end(), map.cbegin(), map.cend(), std::back_inserter(avus_added_or_removed)); - //dwm- + for (const auto & [attribute, value, units] : avus_added_or_removed) { if (attribute != config->index) { irods::indexing::indexer idx{_rei, config->instance_name_}; @@ -698,6 +757,188 @@ namespace { return retvalue.size() ? retvalue : "application/octet-stream"; } + class permissions_calculator { + + public: + + static std::unique_ptr ptr ; // lazy instantiation + + private: + + // Data structures used in tracking user/group permissions. + + std::map users_{}, groups_{}; + std::map> members_{}; + std::map user_entry_{}; + std::multimap user_perms_{}, group_perms_{}; + std::string owner_{}; + + // idx_ is both: + // - an index into the target permissions array (in the JSON) when filling that element. + // - an internal flag which, when non-zero, means that the user permissions tracking data structures have been computed. + // It is used by `reset_perms', thus also indirectly by `calc_perm_info', to determine the need to re-initialize those + // structures before (re-)computation. + + int idx_{0}; + + rsComm_t *conn{}; + + static std::map perm_names; + + public: + + // Is the given user/group name actually a group? + + bool is_group( const std::string& gid ) { + return groups_.find(gid) != groups_.end(); + } + + // Is the given user a member of the given group ? + + bool is_member_of (const std::string& user_id, const std::string& group_id) { + try { + const auto& user_list = members_.at(group_id); + return std::find( user_list.begin(), user_list.end(), user_id) != user_list.end(); + } + catch (const std::out_of_range&) { + irods::log(LOG_ERROR, fmt::format("'{}' is not a group id", group_id)); + } + return false; + } + + void calc_perm_info( const std::string& obj_id, const std::string& obj_type); + + // Helper method. Reset the data structures that track existing permissions. + + void calc_user_info() { + irods::query q{ conn, "select USER_GROUP_NAME,USER_GROUP_ID,USER_NAME,USER_ID"}; + for (const auto& row : q) { + if (row[1] != row[3]) { + members_[row[1]].push_back(row[3]); + groups_[row[1]]=row[0]; + } + else { + users_[row[3]]=row[2]; + } + } + } + + // Helper method. Reset the data structures that track existing permissions. + + void reset_perms () { + if (idx_ != 0) { + user_entry_ = {}; + group_perms_ = {}; + user_perms_ = {}; + owner_ = {}; + } + idx_ = 0; + } + + public: + + // Constructor used here to make the global object. Calculates existing users, groups, and member + // relationships. + + permissions_calculator(rsComm_t *_conn) + : conn{_conn} + { + calc_user_info(); + } + + // Copy constructor, preserves user, group and is-a-member information, but resets other data structures + // in preparation for recomputing permissions info. + + permissions_calculator(const permissions_calculator& x, rsComm_t *_conn) + : users_{x.users_} + , groups_{x.groups_} + , members_{x.members_} + , conn{_conn} + { + reset_perms(); + } + + // Calculate permissions for the object of the given ID, and fill the nlohmann::json struct with the results. + + void get_perms_list(nlohmann::json & j, const std::string & obj_id, const std::string & obj_type) + { + calc_perm_info( obj_id, obj_type); // idx_ member will be zero after this call. + + for (const auto & [pm,gid] : group_perms_) { + j["userPermissions"][idx_]["permission"] = perm_names.at(pm); + j["userPermissions"][idx_++]["user"] = groups_[gid]; + } + for (const auto & [pm,uid] : user_perms_) { + j["userPermissions"][idx_]["permission"] = perm_names.at(pm); + j["userPermissions"][idx_++]["user"] = users_[uid]; + } + j["creator"] = owner_; + idx_ = -1; // force structures to be reinitialized on reuse + } + }; + + // Permission codes and the corresponding strings + + std::map permissions_calculator::perm_names { + { 1050 , "read" }, + { 1120 , "write" }, + { 1200 , "own"} + }; + + // Existing instance of the calculator. Initializes the first time it's needed in the lifetime + // of the iRODS agent. + + std::unique_ptr permissions_calculator::ptr {}; + + // Calculate ownership and permissions for the object of the given ID + // Note obj_type should be either "DATA" or "COLL". + + void permissions_calculator::calc_perm_info(const std::string& obj_id, const std::string& obj_type) + { + reset_perms(); // reset the variables used to calculate owner, user_perms and group_perms + // for later conversion to JSON for indexing. + + /* calculate: + owner (aka the creator of the object) */ + irods::query qown { conn, boost::str(boost::format("select %s_OWNER_NAME where %s_ID = '%s'") + % obj_type % obj_type % obj_id ) }; + for (const auto & row : qown) { + owner_ = row[0]; + break; + } + + /* calculate: + group_perms - maps the reported group permissions to the corresponding group IDs + user_entry - maps the IDs of users which *not* groups to the permission levels reported for them + */ + irods::query qprm { conn, boost::str(boost::format("select %s_ACCESS_TYPE,%s_ACCESS_USER_ID where %s_ID = '%s'") + % obj_type % obj_type % obj_type % obj_id ) }; + for (const auto& i : qprm) { + const auto iperm=std::stol(i[0]); + if (is_group(i[1])) { + group_perms_.insert(make_pair(iperm, i[1])); + } + else { + user_entry_[i[1]] = iperm; + } + } + + /* calculate: + user_perms - maps reported permissions to the IDs of the corresponding users (guaranteed not to be groups) + + This is done by scanning group_perms to ensure each user in question is not a member of a group having equal + or higher privilege. + */ + for (const auto & [uid,iperm] : user_entry_) { + bool include_user = true; + for (auto it = group_perms_.lower_bound(iperm); it!= group_perms_.end(); it++) { + if (is_member_of(uid,it->second)) { include_user = false; break; } + } + if (include_user) { user_perms_.insert( make_pair(iperm, uid)); } + } + // group_perms and user_perms are now ready for storing into the metadata index + } + auto get_system_metadata( ruleExecInfo_t* _rei ,const std::string& _obj_path ) -> nlohmann::json { using nlohmann::json; @@ -715,6 +956,13 @@ namespace { obj["absolutePath"] = _obj_path; bool is_collection = false; + + if (!permissions_calculator::ptr) { + permissions_calculator::ptr.reset( new permissions_calculator{_rei->rsComm} ); + } + permissions_calculator x{*permissions_calculator::ptr, _rei->rsComm}; + + std::string id {}; if (fsvr::is_data_object(s)) { query_str = fmt::format("SELECT DATA_ID , DATA_MODIFY_TIME, DATA_ZONE_NAME, COLL_NAME, DATA_SIZE where DATA_NAME = '{0}'" " and COLL_NAME = '{1}' ", name, parent_name ); @@ -725,8 +973,10 @@ namespace { obj["parentPath"] = i[3]; obj["dataSize"] = std::stol( i[4] ); obj["isFile"] = true; + id = i[0]; break; } + x.get_perms_list(obj, id, "DATA" ); } else if (fsvr::is_collection(s)) { is_collection = true; @@ -739,8 +989,10 @@ namespace { obj["parentPath"] = i[3]; obj["dataSize"] = 0L; obj["isFile"] = false; + id = i[0]; break; } + x.get_perms_list(obj, id, "COLL" ); } auto fileName = obj ["fileName"] = irods_path.object_name(); obj ["url"] = fmt::format(config->urlTemplate, _obj_path); @@ -811,11 +1063,14 @@ irods::error rule_exists( "pep_api_data_obj_unlink_post", "pep_api_mod_avu_metadata_pre", "pep_api_mod_avu_metadata_post", + "pep_api_atomic_apply_acl_operations_post", "pep_api_data_obj_close_post", "pep_api_data_obj_put_post", "pep_api_phy_path_reg_post", "pep_api_rm_coll_pre", "pep_api_rm_coll_post", + "pep_api_mod_access_control_pre", + "pep_api_mod_access_control_post", }; _ret = rules.find(_rn) != rules.end(); diff --git a/packaging/atomic_acl_ops.py b/packaging/atomic_acl_ops.py new file mode 100755 index 0000000..d92b4ae --- /dev/null +++ b/packaging/atomic_acl_ops.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 + +from __future__ import print_function +import sys +import json +import getopt +from os.path import (join, abspath) + +opt,args = getopt.getopt(sys.argv[1:],'v:') +optD = dict(opt) +activate_path = optD.get('-v') +if activate_path is not None: + if '/bin/activate' not in activate_path: + activate_path = join(activate_path, 'bin/activate_this.py') + activate_path = abspath( activate_path ) + exec(open(activate_path).read(), {'__file__': activate_path}) + +from irods.message import iRODSMessage, JSON_Message +from irods.test.helpers import (home_collection, make_session) + +def call_json_api(sess, request_text, api_number): + with sess.pool.get_connection() as conn: + request_msg = iRODSMessage("RODS_API_REQ", JSON_Message( request_text, conn.server_version ), int_info=api_number) + conn.send( request_msg ) + response = conn.recv() + response_msg = response.get_json_encoded_struct() + if response_msg: + print("in atomic apply ACL api, server responded with: %r"%response_msg, file = sys.stderr) + + +def usage(program = sys.argv[0], stream = sys.stderr): + print('{program} logical_path [user1 access1] [user2 access2] ... '.format(**locals()), file = stream) + exit(1) + +if __name__ == '__main__': + + try: + logical_path = args.pop(0) + except IndexError: + usage() + + request = {"logical_path": logical_path, + "operations": [ ] + } + + with make_session() as ses: + while len(args) > 0: + try: + username,access = args[:2] + except ValueError: + usage() + del args[:2] + request["operations"].append({"entity_name": username, + "acl": access }) + call_json_api(ses, request, api_number = 20005) # ATOMIC_APPLY_ACL_OPERATIONS_APN + diff --git a/packaging/atomic_metadata_ops.py b/packaging/atomic_metadata_ops.py index f8874fe..4352ef1 100755 --- a/packaging/atomic_metadata_ops.py +++ b/packaging/atomic_metadata_ops.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from __future__ import print_function import re diff --git a/packaging/test_plugin_indexing.py b/packaging/test_plugin_indexing.py index 475fa2b..f748326 100644 --- a/packaging/test_plugin_indexing.py +++ b/packaging/test_plugin_indexing.py @@ -11,8 +11,9 @@ import zipfile import subprocess -from time import sleep +from time import sleep, time from textwrap import dedent +from datetime import datetime as _datetime if sys.version_info >= (2, 7): import unittest @@ -242,6 +243,31 @@ def install_python3_virtualenv_with_python_irodsclient(PATH='~/py3',preTestPRCIn # Assuming use for metadata style of index only +def search_index_for_userPermissions_user_name(index_name, user_name, port = ELASTICSEARCH_PORT): + maptype = "" if es7_or_later() else "/text" + track_num_hits_as_int = "&track_total_hits=true&rest_total_hits_as_int=true" if es7_exactly() else "" + out,_,rc = lib.execute_command_permissive( dedent("""\ + curl -X GET -H'Content-Type: application/json' HTTP://localhost:{port}/{index_name}{maptype}/_search?pretty=true{track_num_hits_as_int} -d ' + {{ + "from": 0, "size" : 500, + "_source" : ["absolutePath", "userPermissions"], + "query" : {{ + "nested": {{ + "path": "userPermissions", + "query": {{ + "bool": {{ + "must": [ + {{ "match": {{ "userPermissions.user": "{user_name}" }} }} + ] + }} + }} + }} + }} + }}' """).format(**locals())) + if rc != 0: out = None + return out + + def search_index_for_avu_attribute_name(index_name, attr_name, port = ELASTICSEARCH_PORT): maptype = "" if es7_or_later() else "/text" track_num_hits_as_int = "&track_total_hits=true&rest_total_hits_as_int=true" if es7_exactly() else "" @@ -345,6 +371,20 @@ def create_metadata_index(index_name = DEFAULT_METADATA_INDEX, port = ELASTICSEA "type": "date", "format": "epoch_second" }, + "creator": { + "type": "keyword" + }, + "userPermissions": { + "type": "nested", + "properties": { + "permission": { + "type": "keyword" + }, + "user": { + "type": "keyword" + } + } + }, "metadataEntries": { "type": "nested", "properties": { @@ -741,6 +781,92 @@ def test_fulltext_and_metadata_indicators_on_same_colln__19(self): delete_metadata_index() #test_session.assert_icommand('irm -fr '+test_path, 'STDOUT', '') + @staticmethod + def test_coll_name(): + return '{:test_%s_%f}'.format(_datetime.now()) + + def test_indexing_permissions__81(self): + with session.make_session_for_existing_admin() as admin_session,\ + indexing_plugin__installed(indexing_config = {'minimum_delay_time':'1', 'maximum_delay_time':'3'}): + # - Define collections to be used for test. + path_to_home = '/{0.zone_name}/home/{0.username}'.format(admin_session) + test_path = path_to_home + "/" + self.test_coll_name() + # STRUCTURE: (HOME)-->test_dir/ + # chmod user0 data1: +->dir1(*)-->data1 + #--------------------- | nb: (*) index all sub-objects to DEFAULT_METADATA_INDEX + # chmod -r user1 dir2: +-->dir2(**)-->dir3(*)-->data3 (**) index all sub-objects to INDEX_2 + # +-->data2 + sub_path1 = test_path + "/dir1" + sub_path2 = test_path + "/dir2" + sub_path3 = test_path + "/dir2/dir3" + data_1 = sub_path1 + "/data1" + data_2 = sub_path2 + "/data2" + data_3 = sub_path3 + "/data3" + # - Define another index. + INDEX_2 = DEFAULT_METADATA_INDEX+"_2" + try: + + # use two indices, to test that multiple index indicators are respected in recursive (-r) ichmod + create_metadata_index(DEFAULT_METADATA_INDEX) + create_metadata_index(INDEX_2) + + # - Set up a test environment. + objects = [ + ('create', ['-C',{'path':test_path, 'delete_tree_when_done': True }]), + ('create', ['-C',{'path':sub_path1 }]), #--> for ichmod user0 without '-r', on data_1 object only + ('create', ['-C',{'path':sub_path2 }]), #--> for ichmod -r user1 - root collection + ('create', ['-C',{'path':sub_path3 }]), # - child collection + ('add_AVU_ind', ['-C',{'path':sub_path1, 'index_name': DEFAULT_METADATA_INDEX, 'index_type':'metadata'}]), + ('add_AVU_ind', ['-C',{'path':sub_path2, 'index_name': INDEX_2, 'index_type':'metadata'}]), + ('add_AVU_ind', ['-C',{'path':sub_path3, 'index_name': DEFAULT_METADATA_INDEX, 'index_type':'metadata'}]), + ('create', ['-d',{'path':data_1, 'content':'abc'}]), + ('create', ['-d',{'path':data_2, 'content':'def'}]), + ('create', ['-d',{'path':data_3, 'content':'ghi'}]), + ('sleep_for', ['',{'seconds':5}]), + ('wait_for', [self.delay_queue_is_empty, {'num_iter':45,'interval':2.125,'threshold':2}]), + ] + + # - Enact test environment. + with self.logical_filesystem_for_indexing (objects,admin_session): + + # - Convenience functions. + expected_condition = lambda string: (operator.ge,1) if string != 'null' else (operator.eq,0) + num_hits_multiplier = lambda string: 1 if string != 'null' else 0 + + # - On first pass, add ACLs; on the second, enull them. Index and test each time. + + for R_perm,W_perm in [('read','write'),('null','null')]: + + hits_ = ([],[],[]) + + # - Non-recursive ichmod and wait for results. + admin_session.assert_icommand('ichmod {0} {1} {2}'.format(R_perm,self.user0.username,data_1)) + rep_result_0 = repeat_until(*expected_condition(R_perm), transform=make_number_of_hits_fcn(hits_[0]) + ) (search_index_for_userPermissions_user_name) (DEFAULT_METADATA_INDEX,self.user0.username) + + # - Recursive ichmod and wait for results. + admin_session.assert_icommand('ichmod -r {0} {1} {2}'.format(W_perm,self.user1.username,sub_path2)) + rep_result_1 = repeat_until(*expected_condition(W_perm), transform=make_number_of_hits_fcn(hits_[1]) + ) (search_index_for_userPermissions_user_name) (INDEX_2,self.user1.username) + rep_result_2 = repeat_until(*expected_condition(W_perm), transform=make_number_of_hits_fcn(hits_[2]) + ) (search_index_for_userPermissions_user_name) (DEFAULT_METADATA_INDEX,self.user1.username) + + # - Assert all conditions waited on were met. + self.assertTrue(all([rep_result_0, rep_result_1, rep_result_2])) + + # - Assert the expected number of index hits. + nhits = lambda array,user: len(list(filter((lambda _:any(e for e in _['_source']['userPermissions'] if e['user'] == user)), + array[0]['hits']['hits'] ))) + self.assertEqual( num_hits_multiplier(R_perm)*1, nhits(hits_[0], self.user0.username) ) + self.assertEqual( num_hits_multiplier(W_perm)*4, nhits(hits_[1], self.user1.username) ) + self.assertEqual( num_hits_multiplier(W_perm)*2, nhits(hits_[2], self.user1.username) ) + finally: + # - Clean up + delete_metadata_index(DEFAULT_METADATA_INDEX) + delete_metadata_index(INDEX_2) + + + def test_indexing_of_odd_chars_and_json_in_metadata__41__43(self): with session.make_session_for_existing_admin() as admin_session: self.remove_all_jobs_from_delay_queue(admin_session) @@ -820,6 +946,31 @@ def logical_filesystem_for_indexing(self,objects,session): for p in collections_to_delete: session.assert_icommand(['irm', '-rf', p],'STDOUT','') + def test_indexing_with_atomic_acl_ops__81(self): + with indexing_plugin__installed(indexing_config = {'minimum_delay_time':'1', 'maximum_delay_time':'9'}): + test_coll = self.test_coll_name() + INDEX = DEFAULT_METADATA_INDEX + "__81_{:%s.%f}".format(_datetime.now()) + create_metadata_index (INDEX) + try: + with session.make_session_for_existing_admin() as admin_session: + admin_session.assert_icommand("imkdir {0}".format(test_coll)) + admin_session.assert_icommand("itouch {0}/testobj".format(test_coll)) + admin_session.assert_icommand("imeta set -C {0} irods::indexing::index {1}::metadata elasticsearch".format(test_coll, INDEX)) + sleep(5) + # hit the apply_atomic_acl api + self.assertIsNotNone (repeat_until (operator.eq, True) (self.delay_queue_is_empty) (admin_session)) + admin_session.assert_icommand("""cd ${{HOME}} ; python3 ~/scripts/irods/test/atomic_acl_ops.py -v {self.venv_dir} """ + """ '/{0.zone_name}/home/{0.username}'/{test_coll}/testobj {self.user0.username} write """ + """ {self.user1.username} read """.format(admin_session, **locals()), use_unsafe_shell=True) + # Check new permissions are reflected in the index + self.assertIsNotNone( repeat_until(operator.eq, 1, transform=number_of_hits, + ) (search_index_for_userPermissions_user_name) (INDEX, self.user0.username)) + self.assertEqual(1, number_of_hits(search_index_for_userPermissions_user_name (INDEX, self.user1.username))) + finally: + delete_metadata_index (INDEX) + with session.make_session_for_existing_admin() as admin_session: + admin_session.assert_icommand("irm -fr {0}".format(test_coll)) + def test_indexing_with_atomic_metadata_ops_66(self): with indexing_plugin__installed(): test_coll = 'testcoll_66' diff --git a/path_calc.hpp b/path_calc.hpp new file mode 100644 index 0000000..697f94a --- /dev/null +++ b/path_calc.hpp @@ -0,0 +1,95 @@ +#ifndef IRODS_INDEXING_PATH_CALC_HPP +#define IRODS_INDEXING_PATH_CALC_HPP + +#include +#include +#include +#include + +namespace irods { + namespace indexing { + + // Thrown by class path_calc_and_cache, when given a path + // that is not well-formed. This will result from any attempt + // to ascend a collection hierarchy not starting with "/". + + class path_format_error: public std::runtime_error { + public: + path_format_error(const std::string & s) : std::runtime_error(s.c_str()) {} + }; + + // Thrown by class path_calc_and_cache, when it encounters an internal error caused e.g. by + // inserting into a std::map. (This should never happen). + + class path_property_caching_error: public std::runtime_error { + public: + path_property_caching_error(const std::string & s) : std::runtime_error(s.c_str()) {} + }; + + // This class lets us compute and store the cumulative set of items, of the given Property type, + // that are applicable at a given level of the iRODS collection hierarchy. + + template + class path_calc_and_cache { + + public: + using properties = std::set; + + private: + std::map path_properties; + using calc_function = std::function ; + calc_function calc_; + + public: + + // Initialize with a callable object that returns the Property value for a given collection path + // within the hierarchy. + + explicit path_calc_and_cache(calc_function f) + : calc_{f} + { + } + + // Get properties contribution for parents (recursive up to '/') + // and merge set members for the current path. + + properties accum(const std::string &path) + { + using namespace std::string_literals; + auto pos = (path.size() < 1 ? std::string::npos : path.find_last_of("/")); + if (pos == std::string::npos) { + throw path_format_error{ "Couldn't parse: "s + path }; + } + if (path == "/") { + return calc("/"); + } + auto parent_path_length = (pos == 0) ? 1 : pos; + auto accumulated_set = accum(path.substr(0, parent_path_length)); + const auto& current_path_set = calc(path); + accumulated_set.insert(current_path_set.begin(), current_path_set.end()); + return accumulated_set; + } + + // Get individual contribution for last elem of path 'p'. + // Use cached value if it exists, else do the calculation + + const properties& calc(const std::string & path) { + using namespace std::string_literals; + auto it = path_properties.find(path); + if (std::end(path_properties) != it) { + return it->second; + } + auto result = path_properties.insert( {path, (this->calc_)(path)} ); + if (!result.second) { + throw path_property_caching_error{ "Couldn't cache properties for path: "s + path }; + } + return (*result.first).second; + } + + }; // class path_calc_and_cache + + } // namespace indexing + +} // namespace irods + +#endif // IRODS_INDEXING_PATH_CALC_HPP From d0b2a617fe101074c0b07c35a56ffdbac8aedbaf Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Mon, 2 May 2022 15:54:00 +0000 Subject: [PATCH 3/3] [irods/irods_6100] handle repl gracefully --- libirods_rule_engine_plugin-indexing.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libirods_rule_engine_plugin-indexing.cpp b/libirods_rule_engine_plugin-indexing.cpp index e84453d..3e53e2b 100644 --- a/libirods_rule_engine_plugin-indexing.cpp +++ b/libirods_rule_engine_plugin-indexing.cpp @@ -222,8 +222,9 @@ namespace { DEST_RESC_HIER_STR_KW); if(!resc_hier) { const auto Message = boost::str( - boost::format("Will not initiate full-text indexing of new REPL (object path '%s') because resc hier is null [irods/irods#6100].") % object_path); - THROW(SYS_INVALID_INPUT_PARAM, Message.c_str()); + boost::format("Will not initiate full-text indexing of new replica (object path '%s') because resc hier is null [irods/irods#6100].") % object_path); + rodsLog(LOG_DEBUG, Message.c_str()); + return; } irods::hierarchy_parser parser; parser.set_string(resc_hier);