fix github clone to folder gitclone
This commit is contained in:
parent
8fc65ddae8
commit
1f1c183884
1
gitclone/enterprise
Submodule
1
gitclone/enterprise
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 38276c9a237a1779ce7a3e6f31102ff615691690
|
||||||
1
gitclone/odoo
Submodule
1
gitclone/odoo
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 8678e1c777b1faec23d4e7fd311c8ec96041f5b7
|
||||||
@ -133,8 +133,11 @@ class DatasetProcessor:
|
|||||||
Returns:
|
Returns:
|
||||||
List of code samples with metadata
|
List of code samples with metadata
|
||||||
"""
|
"""
|
||||||
temp_dir = tempfile.mkdtemp()
|
# Create a persistent directory for cloned repositories
|
||||||
self.temp_dirs.append(temp_dir)
|
gitclone_dir = Path("./gitclone")
|
||||||
|
gitclone_dir.mkdir(exist_ok=True)
|
||||||
|
temp_dir = str(gitclone_dir)
|
||||||
|
# Note: We don't add this to temp_dirs since we want to keep it
|
||||||
|
|
||||||
depth = 1
|
depth = 1
|
||||||
branch = "18.0"
|
branch = "18.0"
|
||||||
@ -143,31 +146,31 @@ class DatasetProcessor:
|
|||||||
# Clone repository
|
# Clone repository
|
||||||
repo_name = repo_url.split('/')[-1].replace('.git', '')
|
repo_name = repo_url.split('/')[-1].replace('.git', '')
|
||||||
repo_path = os.path.join(temp_dir, repo_name)
|
repo_path = os.path.join(temp_dir, repo_name)
|
||||||
|
if not os.path.exists(repo_path):
|
||||||
|
self.logger.info(f"Cloning {repo_url} to {repo_path}")
|
||||||
|
|
||||||
self.logger.info(f"Cloning {repo_url} to {repo_path}")
|
# Use token for private repositories if provided
|
||||||
|
clone_url = repo_url
|
||||||
# Use token for private repositories if provided
|
if github_token and "github.com" in repo_url:
|
||||||
clone_url = repo_url
|
# Handle SSH URLs
|
||||||
if github_token and "github.com" in repo_url:
|
if repo_url.startswith("git@"):
|
||||||
# Handle SSH URLs
|
# SSH URL doesn't need token modification
|
||||||
if repo_url.startswith("git@"):
|
pass
|
||||||
# SSH URL doesn't need token modification
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# Add token to HTTPS URL
|
|
||||||
if repo_url.startswith("https://"):
|
|
||||||
clone_url = repo_url.replace("https://", f"https://{github_token}@")
|
|
||||||
elif repo_url.startswith("http://"):
|
|
||||||
clone_url = repo_url.replace("http://", f"http://{github_token}@")
|
|
||||||
else:
|
else:
|
||||||
# For URLs like "github.com/user/repo" or "user/repo"
|
# Add token to HTTPS URL
|
||||||
if repo_url.startswith("github.com/"):
|
if repo_url.startswith("https://"):
|
||||||
clone_url = f"https://{github_token}@{repo_url}"
|
clone_url = repo_url.replace("https://", f"https://{github_token}@")
|
||||||
|
elif repo_url.startswith("http://"):
|
||||||
|
clone_url = repo_url.replace("http://", f"http://{github_token}@")
|
||||||
else:
|
else:
|
||||||
# Assume it's a GitHub path like "user/repo"
|
# For URLs like "github.com/user/repo" or "user/repo"
|
||||||
clone_url = f"https://{github_token}@github.com/{repo_url}"
|
if repo_url.startswith("github.com/"):
|
||||||
|
clone_url = f"https://{github_token}@{repo_url}"
|
||||||
repo = git.Repo.clone_from(clone_url, repo_path, depth=depth, branch=branch)
|
else:
|
||||||
|
# Assume it's a GitHub path like "user/repo"
|
||||||
|
clone_url = f"https://{github_token}@github.com/{repo_url}"
|
||||||
|
|
||||||
|
repo = git.Repo.clone_from(clone_url, repo_path, depth=depth, branch=branch)
|
||||||
|
|
||||||
# Extract code samples
|
# Extract code samples
|
||||||
code_samples = self._extract_code_samples(repo_path, config)
|
code_samples = self._extract_code_samples(repo_path, config)
|
||||||
@ -175,8 +178,9 @@ class DatasetProcessor:
|
|||||||
return code_samples
|
return code_samples
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Cleanup
|
# Cleanup temporary directories, but keep gitclone folder
|
||||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
if temp_dir != "./gitclone":
|
||||||
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||||
|
|
||||||
def _extract_code_samples(self, repo_path: str, config: AppConfig) -> List[Dict]:
|
def _extract_code_samples(self, repo_path: str, config: AppConfig) -> List[Dict]:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user