From 9f0529327976ab7dca31d8b0a58c22357baeaba6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=99=E5=88=92?= Date: Wed, 22 Jan 2025 11:13:36 +0800 Subject: [PATCH 1/8] add actions for pre-commit and ci test --- .github/CODEOWNERS | 2 ++ .github/actions/atorch-pre-commit/action.yml | 11 ++++++++ .github/actions/atorch-python-test/action.yml | 17 ++++++++++++ .github/workflows/main.yml | 26 +++++++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 .github/CODEOWNERS create mode 100644 .github/actions/atorch-pre-commit/action.yml create mode 100644 .github/actions/atorch-python-test/action.yml create mode 100644 .github/workflows/main.yml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..32c3675 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,2 @@ +# root directory +* @skydoorkai @adamantboy @hxdtest @nash635 diff --git a/.github/actions/atorch-pre-commit/action.yml b/.github/actions/atorch-pre-commit/action.yml new file mode 100644 index 0000000..b3143eb --- /dev/null +++ b/.github/actions/atorch-pre-commit/action.yml @@ -0,0 +1,11 @@ +--- +name: atorch-pre-commit +description: run pre-commit to check codes for atorch +runs: + using: 'docker' + image: "easydl/atorch:aci" + args: + - "/bin/bash" + - "-c" + - "cd atorch \ +&& sh dev/scripts/pre-commit.sh" diff --git a/.github/actions/atorch-python-test/action.yml b/.github/actions/atorch-python-test/action.yml new file mode 100644 index 0000000..b0c631f --- /dev/null +++ b/.github/actions/atorch-python-test/action.yml @@ -0,0 +1,17 @@ +--- +name: atorch-python-test +description: run pytest to execute python test cases of atorch python +runs: + using: 'docker' + image: "registry.cn-hangzhou.aliyuncs.com/atorch/atorch-open-20240430:pt210" + args: + - "/bin/bash" + - "-c" + - "pip install dlrover[torch]==0.3.8 --no-deps \ +&& echo -e 'import math\ninf = math.inf\nnan = math.nan\nstring_classes = \ +(str, bytes)' > /opt/conda/lib/python3.8/site-packages/torch/_six.py \ +&& cd atorch \ +&& PYTHONPATH=. pytest atorch/tests/common_tests \ +&& cd .. \ +&& git config --global --add safe.directory /github/workspace \ +&& git clean -xdf" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..52cdaad --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,26 @@ +--- +name: CI + +on: + pull_request: + workflow_dispatch: + push: + branches: [master] + +jobs: + python-test: + runs-on: self-hosted + steps: + # This step checks out a copy of your repository. + - uses: actions/checkout@v3 + with: + clean: false + # This step references the directory that contains the action. + - uses: ./.github/actions/atorch-python-test + pre-commit: + runs-on: ubuntu-latest + steps: + # This step checks out a copy of your repository. + - uses: actions/checkout@v3 + # This step references the directory that contains the action. + - uses: ./.github/actions/atorch-pre-commit From 7a0a35a17365c5c03e988e8e7ab1711a12907f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=99=E5=88=92?= Date: Wed, 22 Jan 2025 11:19:13 +0800 Subject: [PATCH 2/8] add pull request template --- .github/pull_request_template.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..8c8d451 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,15 @@ +### What changes were proposed in this pull request? + +Please describe the changes you have made or proposed in this pull request. + +### Why are the changes needed? + +Explain the purpose or motivation behind these changes. What problem are you trying to solve? + +### Does this PR introduce any user-facing change? + +Specify whether this pull request introduces any changes that users will directly interact with or notice. + +### How was this patch tested? + +Detail the testing process you have undertaken to ensure the changes in this pull request are valid and working as intended. From 5f76f221c71e9426ddf5df5ce05045266e1e093e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=99=E5=88=92?= Date: Wed, 22 Jan 2025 11:36:23 +0800 Subject: [PATCH 3/8] add issue template --- .github/ISSUE_TEMPLATE/bug_report.md | 39 +++++++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++++++++++ .github/ISSUE_TEMPLATE/question.md | 10 ++++++ 3 files changed, 69 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/question.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..04ce096 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,39 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: report +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the unexpected case: +1. What kink of training? [e.g. FDDP] +2. The command using? [e.g. dlrover-run xxxxx xxxx] +3. When and where? +4. See error + +**Logs or Screenshots** +Logs(necessary) or screenshots to help explain your problem. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**APP Info (please complete the following information):** + - DLRover: [e.g. 0.3.8] + - Torch [e.g. 2.1.2] + +**ENV Info (please complete the following information):** + - Platform: [e.g. ubuntu xxx] + - Python: [e.g. 3.8.1] + - GRPC [e.g. 1.5.x] + +**HARDWARE Info (please complete the following information):** + - Device: [e.g. GPU A100 / NPU Ascend 910] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 0000000..62fcf82 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,10 @@ +--- +name: Question +about: For questions. +title: '' +labels: question +assignees: '' + +--- + + From 51e0617bd6eaa2ffdbfc16515a9e2a35ca225d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=99=E5=88=92?= Date: Wed, 22 Jan 2025 11:42:47 +0800 Subject: [PATCH 4/8] fix dir --- .github/actions/atorch-pre-commit/action.yml | 1 - .github/actions/atorch-python-test/action.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/actions/atorch-pre-commit/action.yml b/.github/actions/atorch-pre-commit/action.yml index b3143eb..835e74c 100644 --- a/.github/actions/atorch-pre-commit/action.yml +++ b/.github/actions/atorch-pre-commit/action.yml @@ -7,5 +7,4 @@ runs: args: - "/bin/bash" - "-c" - - "cd atorch \ && sh dev/scripts/pre-commit.sh" diff --git a/.github/actions/atorch-python-test/action.yml b/.github/actions/atorch-python-test/action.yml index b0c631f..f6b51da 100644 --- a/.github/actions/atorch-python-test/action.yml +++ b/.github/actions/atorch-python-test/action.yml @@ -10,7 +10,6 @@ runs: - "pip install dlrover[torch]==0.3.8 --no-deps \ && echo -e 'import math\ninf = math.inf\nnan = math.nan\nstring_classes = \ (str, bytes)' > /opt/conda/lib/python3.8/site-packages/torch/_six.py \ -&& cd atorch \ && PYTHONPATH=. pytest atorch/tests/common_tests \ && cd .. \ && git config --global --add safe.directory /github/workspace \ From ab7593104a3cd308f06e81cc49f12132378530f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=99=E5=88=92?= Date: Wed, 22 Jan 2025 11:47:29 +0800 Subject: [PATCH 5/8] fix action --- .github/actions/atorch-pre-commit/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/atorch-pre-commit/action.yml b/.github/actions/atorch-pre-commit/action.yml index 835e74c..c942caf 100644 --- a/.github/actions/atorch-pre-commit/action.yml +++ b/.github/actions/atorch-pre-commit/action.yml @@ -7,4 +7,4 @@ runs: args: - "/bin/bash" - "-c" -&& sh dev/scripts/pre-commit.sh" + - "sh dev/scripts/pre-commit.sh" From 96aeb3f027e745346b76518848c19bc3ef67db9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=99=E5=88=92?= Date: Wed, 22 Jan 2025 12:39:19 +0800 Subject: [PATCH 6/8] update dlrover version --- .github/actions/atorch-python-test/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/atorch-python-test/action.yml b/.github/actions/atorch-python-test/action.yml index f6b51da..975adec 100644 --- a/.github/actions/atorch-python-test/action.yml +++ b/.github/actions/atorch-python-test/action.yml @@ -7,7 +7,7 @@ runs: args: - "/bin/bash" - "-c" - - "pip install dlrover[torch]==0.3.8 --no-deps \ + - "pip install dlrover[torch]==0.4.0 --no-deps \ && echo -e 'import math\ninf = math.inf\nnan = math.nan\nstring_classes = \ (str, bytes)' > /opt/conda/lib/python3.8/site-packages/torch/_six.py \ && PYTHONPATH=. pytest atorch/tests/common_tests \ From 2d0a44ec4761c7d3739738be7afc83ab1add8a09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=99=E5=88=92?= Date: Wed, 22 Jan 2025 12:46:51 +0800 Subject: [PATCH 7/8] change link / script for atorch --- README.md | 10 +++++----- setup.py.tpl | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4b09a7b..7f2a74c 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ - [![GitHub Repo stars](https://img.shields.io/github/stars/intelligent-machine-learning/dlrover?style=social)](https://github.com/intelligent-machine-learning/dlrover/stargazers) - [![Build](https://github.com/intelligent-machine-learning/dlrover/actions/workflows/main.yml/badge.svg)](https://github.com/intelligent-machine-learning/dlrover/actions/workflows/main.yml) + [![GitHub Repo stars](https://img.shields.io/github/stars/intelligent-machine-learning/atorch?style=social)](https://github.com/intelligent-machine-learning/atorch/stargazers) + [![Build](https://github.com/intelligent-machine-learning/atorch/actions/workflows/main.yml/badge.svg)](https://github.com/intelligent-machine-learning/atorch/actions/workflows/main.yml) [![PyPI Status Badge](https://badge.fury.io/py/atorch.svg)](https://pypi.org/project/atorch/) @@ -74,8 +74,8 @@ pip install atorch ``` # clone repository -git clone https://github.com/intelligent-machine-learning/dlrover.git -cd dlrover/atorch +git clone https://github.com/intelligent-machine-learning/atorch.git +cd atorch # build package, optional set version. bash dev/scripts/build.sh [version] # install the created package in dist directory. Note that if version is set, file name is different. @@ -90,7 +90,7 @@ pip install dist/atorch-0.1.0.dev0-py3-none-any.whl - To run [auto_accelerate examples](examples/auto_accelerate): ``` -cd dlrover/atorch/examples/auto_accelerate +cd atorch/examples/auto_accelerate # Single process train python train.py --model_type toy # Distributed train diff --git a/setup.py.tpl b/setup.py.tpl index bf67b40..52a245e 100644 --- a/setup.py.tpl +++ b/setup.py.tpl @@ -204,7 +204,7 @@ setup( " large-scale pretraining and finetuning of LLMs with over 100 billion parameters and" " thousands of advanced GPUs.", author="Ant Group", - url="https://github.com/intelligent-machine-learning/dlrover/tree/master/atorch", + url="https://github.com/intelligent-machine-learning/atorch", python_requires=">=3.8", packages=find_packages(exclude=["*test*", "benchmarks*"]), install_requires=required_deps, From 70e25f77b91cccab48f31c617df5896eeecef3b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=99=E5=88=92?= Date: Wed, 22 Jan 2025 15:53:39 +0800 Subject: [PATCH 8/8] update action --- .github/actions/atorch-python-test/action.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/actions/atorch-python-test/action.yml b/.github/actions/atorch-python-test/action.yml index 975adec..eea3eb7 100644 --- a/.github/actions/atorch-python-test/action.yml +++ b/.github/actions/atorch-python-test/action.yml @@ -10,7 +10,4 @@ runs: - "pip install dlrover[torch]==0.4.0 --no-deps \ && echo -e 'import math\ninf = math.inf\nnan = math.nan\nstring_classes = \ (str, bytes)' > /opt/conda/lib/python3.8/site-packages/torch/_six.py \ -&& PYTHONPATH=. pytest atorch/tests/common_tests \ -&& cd .. \ -&& git config --global --add safe.directory /github/workspace \ -&& git clean -xdf" +&& PYTHONPATH=. pytest atorch/tests/common_tests"