Skip to content
Closed

Jira #90

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions app/alembic/versions/836a5f803c4d_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""status

Revision ID: 836a5f803c4d
Revises: 4d9562314bd3
Create Date: 2023-04-11 03:17:06.459499

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '836a5f803c4d'
down_revision = '4d9562314bd3'
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
try:
with op.batch_alter_table('document', schema=None) as batch_op:
batch_op.add_column(sa.Column('is_active', sa.Boolean(), nullable=True))
except Exception as e:
print(e)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
try:
with op.batch_alter_table('document', schema=None) as batch_op:
batch_op.drop_column('is_active')
except Exception as e:
print(e)
# ### end Alembic commands ###
7 changes: 4 additions & 3 deletions app/data_source/api/base_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from pydantic import BaseModel

from data_source.api.utils import get_utc_time_now
from db_engine import Session
from queues.task_queue import TaskQueue, Task
from schemas import DataSource
Expand Down Expand Up @@ -117,7 +118,7 @@ def _save_index_time_in_db(self) -> None:
"""
with Session() as session:
data_source: DataSource = session.query(DataSource).filter_by(id=self._data_source_id).first()
data_source.last_indexed_at = datetime.now()
data_source.last_indexed_at = get_utc_time_now()
session.commit()

def add_task_to_queue(self, function: Callable, **kwargs):
Expand All @@ -127,14 +128,14 @@ def add_task_to_queue(self, function: Callable, **kwargs):
TaskQueue.get_instance().add_task(task)

def run_task(self, function_name: str, **kwargs) -> None:
self._last_task_time = datetime.now()
self._last_task_time = get_utc_time_now()
function = getattr(self, function_name)
function(**kwargs)

def index(self, force: bool = False) -> None:
if self._last_task_time is not None and not force:
# Don't index if the last task was less than an hour ago
time_since_last_task = datetime.now() - self._last_task_time
time_since_last_task = get_utc_time_now() - self._last_task_time
if time_since_last_task.total_seconds() < 60 * 60:
logging.info("Skipping indexing data source because it was indexed recently")
return
Expand Down
3 changes: 2 additions & 1 deletion app/data_source/api/basic_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ class BasicDocument:
author_image_url: str
location: str
url: str
status: str = None
is_active: bool = None
file_type: FileType = None
status: DocumentStatus = None
children: List['BasicDocument'] = None

@property
Expand Down
3 changes: 2 additions & 1 deletion app/data_source/api/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from data_source.api.base_data_source import BaseDataSource
from data_source.api.dynamic_loader import DynamicLoader, ClassInfo
from data_source.api.exception import KnownException
from data_source.api.utils import get_utc_time_now
from db_engine import Session
from schemas import DataSourceType, DataSource

Expand Down Expand Up @@ -59,7 +60,7 @@ def create_data_source(cls, name: str, config: dict) -> BaseDataSource:
data_source_class.validate_config(config)
config_str = json.dumps(config)

data_source_row = DataSource(type_id=data_source_type.id, config=config_str, created_at=datetime.now())
data_source_row = DataSource(type_id=data_source_type.id, config=config_str, created_at=get_utc_time_now())
session.add(data_source_row)
session.commit()

Expand Down
6 changes: 5 additions & 1 deletion app/data_source/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from functools import lru_cache
from io import BytesIO
from typing import Optional

from datetime import datetime, timezone
import requests

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -55,3 +55,7 @@ def get_confluence_user_image(image_url: str, token: str) -> Optional[str]:
return f"data:image/jpeg;base64,{base64.b64encode(image_bytes.getvalue()).decode()}"
except:
logger.warning(f"Failed to get confluence user image {image_url}")


def get_utc_time_now() -> datetime:
return datetime.now(tz=timezone.utc)
4 changes: 4 additions & 0 deletions app/data_source/sources/confluence/confluence.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def get_config_fields() -> List[ConfigField]:
ConfigField(label="Personal Access Token", name="token", input_type=HTMLInputType.PASSWORD)
]

@classmethod
def get_display_name(cls) -> str:
return "Confluence Self-Hosted"

@staticmethod
def list_spaces(confluence: Confluence, start=0) -> List[Location]:
# Usually the confluence connection fails, so we retry a few times
Expand Down
7 changes: 5 additions & 2 deletions app/data_source/sources/confluence/confluence_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ def get_config_fields() -> List[ConfigField]:
ConfigField(label="Username", name="username", placeholder="example.user@email.com")
]

@classmethod
def get_display_name(cls) -> str:
return "Confluence Cloud"

@staticmethod
def validate_config(config: Dict) -> None:
try:
Expand All @@ -35,9 +39,8 @@ def validate_config(config: Dict) -> None:
@staticmethod
def confluence_client_from_config(config: Dict) -> Confluence:
parsed_config = ConfluenceCloudConfig(**config)
should_verify_ssl = os.environ.get('CONFLUENCE_CLOUD_VERIFY_SSL') is not None
return Confluence(url=parsed_config.url, username=parsed_config.username,
password=parsed_config.token, verify_ssl=should_verify_ssl, cloud=True)
password=parsed_config.token, cloud=True)

@staticmethod
def list_locations(config: Dict) -> List[Location]:
Expand Down
5 changes: 4 additions & 1 deletion app/data_source/sources/gitlab/gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ def feed_issue(self, issue: Dict):
timestamp=datetime.strptime(raw_comment["updated_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
))

status = gitlab_status_to_doc_status(issue["state"])
is_active = status == DocumentStatus.OPEN
doc = BasicDocument(
id=issue["id"],
data_source_id=self._data_source_id,
Expand All @@ -132,7 +134,8 @@ def feed_issue(self, issue: Dict):
location=issue['references']['full'].replace("/", " / "),
url=issue['web_url'],
timestamp=last_modified,
status=gitlab_status_to_doc_status(issue["state"]),
status=issue["state"],
is_active=is_active,
children=comments
)
IndexQueue.get_instance().put_single(doc=doc)
1 change: 0 additions & 1 deletion app/data_source/sources/google_drive/google_drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from googleapiclient.http import MediaIoBaseDownload
from httplib2 import Http
from oauth2client.service_account import ServiceAccountCredentials
from pydantic import BaseModel

from data_source.api.base_data_source import BaseDataSource, ConfigField, HTMLInputType, BaseDataSourceConfig
from data_source.api.basic_document import BasicDocument, DocumentType, FileType
Expand Down
Empty file.
151 changes: 151 additions & 0 deletions app/data_source/sources/jira/jira.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import logging
import os
from datetime import datetime
from typing import List, Dict

from atlassian import Jira
from atlassian.errors import ApiError

from data_source.api.base_data_source import BaseDataSource, ConfigField, HTMLInputType, Location, BaseDataSourceConfig
from data_source.api.basic_document import BasicDocument, DocumentType, DocumentStatus
from data_source.api.exception import InvalidDataSourceConfig
from queues.index_queue import IndexQueue


class JiraConfig(BaseDataSourceConfig):
url: str
token: str


logger = logging.getLogger(__name__)


class JiraDataSource(BaseDataSource):

@classmethod
def get_display_name(cls) -> str:
return "Jira Self-Hosted"

@staticmethod
def get_config_fields() -> List[ConfigField]:
return [
ConfigField(label="Jira URL", name="url", placeholder="https://example.jira.com"),
ConfigField(label="Personal Access Token", name="token", input_type=HTMLInputType.PASSWORD)
]

@staticmethod
def list_projects(jira: Jira) -> List[Location]:
logger.info('Listing projects')
projects = jira.get_all_projects()
return [Location(label=project['name'], value=project['key']) for project in projects]

@staticmethod
def list_locations(config: Dict) -> List[Location]:
jira = JiraDataSource.client_from_config(config)
return JiraDataSource.list_projects(jira=jira)

@staticmethod
def client_from_config(config: Dict) -> Jira:
parsed_config = JiraConfig(**config)
should_verify_ssl = os.environ.get('JIRA_VERIFY_SSL') is not None
return Jira(url=parsed_config.url, token=parsed_config.token, verify_ssl=should_verify_ssl)

@staticmethod
def has_prerequisites() -> bool:
return True

@staticmethod
def validate_config(config: Dict) -> None:
try:
jira = JiraDataSource.client_from_config(config)
jira.get_all_priorities()
except ApiError as e:
raise InvalidDataSourceConfig from e

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._jira = JiraDataSource.client_from_config(self._raw_config)

def _feed_new_documents(self) -> None:
logger.info('Feeding new documents with Jira')
projects = self._config.locations_to_index or JiraDataSource.list_projects(jira=self._jira)
for project in projects:
self.add_task_to_queue(self._feed_project_issues, project=project)

def _feed_project_issues(self, project: Location):
logging.info(f'Getting issues from project {project.label} ({project.value})')

start = 0
limit = 100
last_index_time = self._last_index_time.strftime("%Y-%m-%d %H:%M")
jql_query = f'project = "{project.value}" AND updated >= "{last_index_time}" ORDER BY updated DESC'
while True:
new_batch = self._jira.jql_get_list_of_tickets(jql_query, start=start, limit=limit, validate_query=True)
len_new_batch = len(new_batch)
logger.info(f'Got {len_new_batch} issues from project {project.label} (total {start + len_new_batch})')
for raw_issue in new_batch:
self.add_task_to_queue(self._feed_issue, raw_issue=raw_issue, project_name=project.label)

if len(new_batch) < limit:
break

start += limit

def _feed_issue(self, raw_issue: Dict, project_name: str):
issue_id = raw_issue['id']
last_modified = datetime.strptime(raw_issue['fields']['updated'], "%Y-%m-%dT%H:%M:%S.%f%z")
issue_url = self._raw_config['url'] + '/browse/' + raw_issue['key']
comments = []
raw_comments = self._jira.issue_get_comments(issue_id)
for raw_comment in raw_comments['comments']:
comments.append(BasicDocument(
id=raw_comment["id"],
data_source_id=self._data_source_id,
type=DocumentType.COMMENT,
title=raw_comment["author"]["displayName"],
content=raw_comment["body"],
author=raw_comment["author"]["displayName"],
author_image_url=raw_comment["author"]["avatarUrls"]["48x48"],
location=raw_issue['key'],
url=issue_url,
timestamp=datetime.strptime(raw_comment["updated"], "%Y-%m-%dT%H:%M:%S.%f%z")
))

author = None
if assignee := raw_issue['fields'].get('assignee'):
author = assignee
elif reporter := raw_issue['fields'].get('reporter'):
author = reporter
elif creator := raw_issue['fields'].get('creator'):
author = creator

if author:
author_name = author['displayName']
author_image_url = author['avatarUrls']['48x48']
else:
author_name = 'Unknown'
author_image_url = ""

content = raw_issue['fields']['description']
title = raw_issue['fields']['summary']
doc = BasicDocument(title=title,
content=content,
author=author_name,
author_image_url=author_image_url,
timestamp=last_modified,
id=issue_id,
data_source_id=self._data_source_id,
location=project_name,
url=issue_url,
status=raw_issue['fields']['status']['name'],
type=DocumentType.ISSUE,
children=comments)
IndexQueue.get_instance().put_single(doc=doc)


# if __name__ == '__main__':
# import os
# ds = JiraDataSource(config={"url": os.getenv('JIRA_URL'), "token": os.getenv('JIRA_TOKEN')}, data_source_id=5)
# projects = ds.list_projects(ds._jira)
# for project in projects:
# ds._feed_project_issues(project=project)
51 changes: 51 additions & 0 deletions app/data_source/sources/jira/jira_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from typing import List, Dict

from atlassian import Jira

from data_source.api.base_data_source import ConfigField, HTMLInputType, Location, BaseDataSourceConfig
from data_source.api.exception import InvalidDataSourceConfig
from data_source.sources.jira.jira import JiraDataSource


class JiraCloudConfig(BaseDataSourceConfig):
url: str
token: str
username: str


class JiraCloudDataSource(JiraDataSource):

@staticmethod
def get_config_fields() -> List[ConfigField]:
return [
ConfigField(label="Jira Cloud URL", name="url", placeholder="https://example.jira.com"),
ConfigField(label="Personal API Token", name="token", input_type=HTMLInputType.PASSWORD),
ConfigField(label="Username", name="username", placeholder="example.user@email.com")
]

@staticmethod
def validate_config(config: Dict) -> None:
try:
client = JiraCloudDataSource.client_from_config(config)
JiraCloudDataSource.list_projects(jira=client)
except Exception as e:
raise InvalidDataSourceConfig from e

@classmethod
def get_display_name(cls) -> str:
return "Jira Cloud"

@staticmethod
def client_from_config(config: Dict) -> Jira:
parsed_config = JiraCloudConfig(**config)
return Jira(url=parsed_config.url, username=parsed_config.username,
password=parsed_config.token, cloud=True)

@staticmethod
def list_locations(config: Dict) -> List[Location]:
jira = JiraCloudDataSource.client_from_config(config)
return JiraDataSource.list_projects(jira=jira)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._jira = JiraCloudDataSource.client_from_config(self._raw_config)
3 changes: 2 additions & 1 deletion app/indexing/index_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def basic_to_document(document: BasicDocument, parent: Document = None) -> Docum
id_in_data_source=document.id_in_data_source,
type=document.type.value,
file_type=get_enum_value_or_none(document.file_type),
status=get_enum_value_or_none(document.status),
status=document.status,
is_active=document.is_active,
title=document.title,
author=document.author,
author_image_url=document.author_image_url,
Expand Down
Loading