Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions pytest-Tests/hdfs/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# This workflow will build a Java project with Maven, and cache/restore any dependencies to improve the workflow execution time
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-java-with-maven

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

import docker
import pytest
import time
from test_config import (HADOOP_CONTAINER, HDFS_USER,KMS_PROPERTY,CORE_SITE_XML_PATH)

# Setup Docker Client
client = docker.from_env()

@pytest.fixture(scope="session")
def hadoop_container():
container = client.containers.get(HADOOP_CONTAINER) #to get hadoop container instance
return container

def ensure_key_provider_and_simple_auth(container) -> bool:
"""
Ensures:
1) KMS provider property exists
2) hadoop.security.authentication = simple
Returns True if the file was modified.
"""
changed = False

# 1) Ensure KMS provider property exists
exit_code, _ = container.exec_run(
f"grep -q 'hadoop.security.key.provider.path' {CORE_SITE_XML_PATH}",
user="root",
)
if exit_code != 0:
container.exec_run(
f"sed -i '/<\\/configuration>/i {KMS_PROPERTY}' {CORE_SITE_XML_PATH}",
user="root",
)
changed = True

# 2) Force auth to simple (replace value if property exists, else insert new property)
exit_code, _ = container.exec_run(
f"grep -q '<name>hadoop.security.authentication</name>' {CORE_SITE_XML_PATH}",
user="root",
)
if exit_code == 0:
container.exec_run(
"sed -i "
"'/<name>hadoop.security.authentication<\\/name>/,/<\\/property>/ "
"s/<value>[^<]*<\\/value>/<value>simple<\\/value>/' "
f"{CORE_SITE_XML_PATH}",
user="root",
)
changed = True
else:
simple_prop = (
"<property><name>hadoop.security.authentication</name>"
"<value>simple</value></property>"
)
container.exec_run(
f"sed -i '/<\\/configuration>/i {simple_prop}' {CORE_SITE_XML_PATH}",
user="root",
)
changed = True

return changed

def ensure_user_exists(container, username: str) -> None:
exit_code, _ = container.exec_run(f"id -u {username}", user="root")
if exit_code == 0:
return

container.exec_run(f"useradd -m -s /bin/bash {username}", user="root")
container.exec_run(f"usermod -aG hadoop {username}", user="root")


@pytest.fixture(scope="session", autouse=True)
def setup_environment(hadoop_container):
changed = ensure_key_provider_and_simple_auth(hadoop_container)
if changed:
hadoop_container.restart()

time.sleep(30) # Wait for container to restart and services to come up

ensure_user_exists(hadoop_container, "keyadmin")
hadoop_container.exec_run("hdfs dfsadmin -safemode leave", user=HDFS_USER)

yield
95 changes: 95 additions & 0 deletions pytest-Tests/hdfs/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
<!---
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


This workflow will build a Java project with Maven, and cache/restore any dependencies to improve the workflow execution time
For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-java-with-maven

This workflow uses actions that are not certified by GitHub.
They are provided by a third-party and are governed by
separate terms of service, privacy policy, and support
documentation.
-->

# This is the main directory for testing HDFS encryption cycle

## Structure
```
test_hdfs/
├── test_encryption.py
├── test_encryption02.py
├── test_encryption03.py
├── test_config.py #stores all constants and HDFS commands
├── conftest.py #sets up the environment
├── utils.py #utility methods

```

---

## Features

- **Markers:**
Markers can be used to selectively run specific test cases, improving test efficiency and organization.

---

### `setup_environment`

Handled in `conftest.py` file
Before running the test cases, some environment configurations are needed:
- HDFS must communicate with KMS to fetch key details.
- Specific KMS properties are added to the `core-site.xml` file.
- Containers are restarted to apply the changes effectively.

---

### Utility Methods

- **get_error_logs:**
Fetches logs from both KMS and HDFS containers. Helps in identifying issues when errors or exceptions occur during testing.

- **run_command:**
Executes all necessary HDFS commands inside the containers.

---

## `test_encryption.py`

Handles the **full HDFS encryption cycle**, including setup, positive and negative test scenarios, and cleanup.

### Main Highlights:
- Encryption Zone (EZ) creation in HDFS.
- Granting permissions to specific users for read/write operations within the EZ.
- Validating read/write attempts by unauthorized users inside the EZ.


## `test_encryption02.py`

Handles the **Check if after key roll over old files can be read or not**
**Check if after key roll over new files can be written and read too**
**Check read operation on file after key deletion**

---

## `test_encryption03.py`

Handles the **Test case on cross Encryption zone operations**


## Summary

This test suite ensures that **HDFS encryption and access control mechanisms** function as expected, validating both authorized and unauthorized access scenarios.
100 changes: 100 additions & 0 deletions pytest-Tests/hdfs/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# This workflow will build a Java project with Maven, and cache/restore any dependencies to improve the workflow execution time
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-java-with-maven

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

##Contains all constant values regarding USER, PATH, HDFS Commands----------------------

HDFS_USER = "hdfs"
HIVE_USER = "hive"
HBASE_USER= "hbase"
KEY_ADMIN="keyadmin"
HEADERS={"Content-Type": "application/json","Accept":"application/json"}
PARAMS={"user.name":"keyadmin"}
BASE_URL="http://localhost:9292/kms/v1"

HADOOP_CONTAINER = "ranger-hadoop"
KMS_CONTAINER = "ranger-kms"

#KMS configs that needs to be added in XML file------------add more if needed
KMS_PROPERTY = """<property><name>hadoop.security.key.provider.path</name><value>kms://[email protected]:9292/kms</value></property>"""

CORE_SITE_XML_PATH = "/opt/hadoop/etc/hadoop/core-site.xml"
HADOOP_NAMENODE_LOG_PATH="/opt/hadoop/logs/hadoop-hdfs-namenode-ranger-hadoop.rangernw.log"
KMS_LOG_PATH="/var/log/ranger/kms/ranger-kms-ranger-kms.rangernw-root.log"


# HDFS Commands----------------------------------------------------
CREATE_KEY_COMMAND = "hadoop key create {key_name} -size 128 -provider kms://[email protected]:9292/kms"

VALIDATE_KEY_COMMAND = "hadoop key list -provider kms://[email protected]:9292/kms"

CREATE_EZ_COMMANDS = [
"hdfs dfs -mkdir /{ez_name}",
"hdfs crypto -createZone -keyName {key_name} -path /{ez_name}",
"hdfs crypto -listZones"
]

GRANT_PERMISSIONS_COMMANDS = [
"hdfs dfs -chmod -R 700 /{ez_name}",
"hdfs dfs -chown -R {user}:{user} /{ez_name}"
]

CREATE_FILE_COMMAND = [ 'echo "{filecontent}" > /home/{user}/{filename}.txt && ls -l /home/{user}/{filename}.txt' ]

ACTIONS_COMMANDS = [
"hdfs dfs -put /home/{user}/{filename}.txt /{ez_name}/",
"hdfs dfs -ls /{ez_name}/",
"hdfs dfs -cat /{ez_name}/{filename}.txt"
]

CROSS_EZ_ACTION_COMMANDS = [
"hdfs dfs -put /home/{user}/{filename}.txt /{ez_name}/{dirname}/",
"hdfs dfs -ls /{ez_name}/",
"hdfs dfs -cat /{ez_name}/{dirname}/{filename}.txt"
]

READ_EZ_FILE=[
"hdfs dfs -cat /{ez_name}/{filename}.txt"
]

READ_EZ = [
"hdfs dfs -cat /{ez_name}/"
]

UNAUTHORIZED_WRITE_COMMAND = 'hdfs dfs -put /home/{user}/{filename}.txt /{ez_name}/'

UNAUTHORIZED_READ_COMMAND = "hdfs dfs -cat /{ez_name}/{filename}.txt"

CLEANUP_COMMANDS = [
"hdfs dfs -rm /{ez_name}/{filename}.txt",
"hdfs dfs -rm -R /{ez_name}"
]
CLEANUP_EZ = [
"hdfs dfs -rm -R /{ez_name}"
]
CLEANUP_EZ_FILE = [
"hdfs dfs -rm /{ez_name}/{filename}.txt"
]
KEY_DELETION_CMD = "bash -c \"echo 'Y' | hadoop key delete {key_name} -provider kms://[email protected]:9292/kms\""


Loading